linux/net/packet/internal.h
Stanislav Fomichev d8d85ef0a6 af_packet: move notifier's packet_dev_mc out of rcu critical section
Syzkaller reports the following issue:

 BUG: sleeping function called from invalid context at kernel/locking/mutex.c:578
 __mutex_lock+0x106/0xe80 kernel/locking/mutex.c:746
 team_change_rx_flags+0x38/0x220 drivers/net/team/team_core.c:1781
 dev_change_rx_flags net/core/dev.c:9145 [inline]
 __dev_set_promiscuity+0x3f8/0x590 net/core/dev.c:9189
 netif_set_promiscuity+0x50/0xe0 net/core/dev.c:9201
 dev_set_promiscuity+0x126/0x260 net/core/dev_api.c:286 packet_dev_mc net/packet/af_packet.c:3698 [inline]
 packet_dev_mclist_delete net/packet/af_packet.c:3722 [inline]
 packet_notifier+0x292/0xa60 net/packet/af_packet.c:4247
 notifier_call_chain+0x1b3/0x3e0 kernel/notifier.c:85
 call_netdevice_notifiers_extack net/core/dev.c:2214 [inline]
 call_netdevice_notifiers net/core/dev.c:2228 [inline]
 unregister_netdevice_many_notify+0x15d8/0x2330 net/core/dev.c:11972
 rtnl_delete_link net/core/rtnetlink.c:3522 [inline]
 rtnl_dellink+0x488/0x710 net/core/rtnetlink.c:3564
 rtnetlink_rcv_msg+0x7cf/0xb70 net/core/rtnetlink.c:6955
 netlink_rcv_skb+0x219/0x490 net/netlink/af_netlink.c:2534

Calling `PACKET_ADD_MEMBERSHIP` on an ops-locked device can trigger
the `NETDEV_UNREGISTER` notifier, which may require disabling promiscuous
and/or allmulti mode. Both of these operations require acquiring
the netdev instance lock.

Move the call to `packet_dev_mc` outside of the RCU critical section.
The `mclist` modifications (add, del, flush, unregister) are protected by
the RTNL, not the RCU. The RCU only protects the `sklist` and its
associated `sks`. The delayed operation on the `mclist` entry remains
within the RTNL.

Reported-by: syzbot+b191b5ccad8d7a986286@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=b191b5ccad8d7a986286
Fixes: ad7c7b2172 ("net: hold netdev instance lock during sysfs operations")
Signed-off-by: Stanislav Fomichev <stfomichev@gmail.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Link: https://patch.msgid.link/20250522031129.3247266-1-stfomichev@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2025-05-27 11:36:26 +02:00

165 lines
3.7 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __PACKET_INTERNAL_H__
#define __PACKET_INTERNAL_H__
#include <linux/refcount.h>
struct packet_mclist {
struct packet_mclist *next;
int ifindex;
int count;
unsigned short type;
unsigned short alen;
unsigned char addr[MAX_ADDR_LEN];
struct list_head remove_list;
};
/* kbdq - kernel block descriptor queue */
struct tpacket_kbdq_core {
struct pgv *pkbdq;
unsigned int feature_req_word;
unsigned int hdrlen;
unsigned char reset_pending_on_curr_blk;
unsigned char delete_blk_timer;
unsigned short kactive_blk_num;
unsigned short blk_sizeof_priv;
/* last_kactive_blk_num:
* trick to see if user-space has caught up
* in order to avoid refreshing timer when every single pkt arrives.
*/
unsigned short last_kactive_blk_num;
char *pkblk_start;
char *pkblk_end;
int kblk_size;
unsigned int max_frame_len;
unsigned int knum_blocks;
uint64_t knxt_seq_num;
char *prev;
char *nxt_offset;
struct sk_buff *skb;
rwlock_t blk_fill_in_prog_lock;
/* Default is set to 8ms */
#define DEFAULT_PRB_RETIRE_TOV (8)
unsigned short retire_blk_tov;
unsigned short version;
unsigned long tov_in_jiffies;
/* timer to retire an outstanding block */
struct timer_list retire_blk_timer;
};
struct pgv {
char *buffer;
};
struct packet_ring_buffer {
struct pgv *pg_vec;
unsigned int head;
unsigned int frames_per_block;
unsigned int frame_size;
unsigned int frame_max;
unsigned int pg_vec_order;
unsigned int pg_vec_pages;
unsigned int pg_vec_len;
unsigned int __percpu *pending_refcnt;
union {
unsigned long *rx_owner_map;
struct tpacket_kbdq_core prb_bdqc;
};
};
extern struct mutex fanout_mutex;
#define PACKET_FANOUT_MAX (1 << 16)
struct packet_fanout {
possible_net_t net;
unsigned int num_members;
u32 max_num_members;
u16 id;
u8 type;
u8 flags;
union {
atomic_t rr_cur;
struct bpf_prog __rcu *bpf_prog;
};
struct list_head list;
spinlock_t lock;
refcount_t sk_ref;
struct packet_type prot_hook ____cacheline_aligned_in_smp;
struct sock __rcu *arr[] __counted_by(max_num_members);
};
struct packet_rollover {
int sock;
atomic_long_t num;
atomic_long_t num_huge;
atomic_long_t num_failed;
#define ROLLOVER_HLEN (L1_CACHE_BYTES / sizeof(u32))
u32 history[ROLLOVER_HLEN] ____cacheline_aligned;
} ____cacheline_aligned_in_smp;
struct packet_sock {
/* struct sock has to be the first member of packet_sock */
struct sock sk;
struct packet_fanout *fanout;
union tpacket_stats_u stats;
struct packet_ring_buffer rx_ring;
struct packet_ring_buffer tx_ring;
int copy_thresh;
spinlock_t bind_lock;
struct mutex pg_vec_lock;
unsigned long flags;
int ifindex; /* bound device */
u8 vnet_hdr_sz;
__be16 num;
struct packet_rollover *rollover;
struct packet_mclist *mclist;
atomic_long_t mapped;
enum tpacket_versions tp_version;
unsigned int tp_hdrlen;
unsigned int tp_reserve;
unsigned int tp_tstamp;
struct completion skb_completion;
struct net_device __rcu *cached_dev;
struct packet_type prot_hook ____cacheline_aligned_in_smp;
atomic_t tp_drops ____cacheline_aligned_in_smp;
};
#define pkt_sk(ptr) container_of_const(ptr, struct packet_sock, sk)
enum packet_sock_flags {
PACKET_SOCK_ORIGDEV,
PACKET_SOCK_AUXDATA,
PACKET_SOCK_TX_HAS_OFF,
PACKET_SOCK_TP_LOSS,
PACKET_SOCK_RUNNING,
PACKET_SOCK_PRESSURE,
PACKET_SOCK_QDISC_BYPASS,
};
static inline void packet_sock_flag_set(struct packet_sock *po,
enum packet_sock_flags flag,
bool val)
{
if (val)
set_bit(flag, &po->flags);
else
clear_bit(flag, &po->flags);
}
static inline bool packet_sock_flag(const struct packet_sock *po,
enum packet_sock_flags flag)
{
return test_bit(flag, &po->flags);
}
#endif