linux/net/core/netdev-genl.c
Samiullah Khawaja 2677010e77 Add support to set NAPI threaded for individual NAPI
A net device has a threaded sysctl that can be used to enable threaded
NAPI polling on all of the NAPI contexts under that device. Allow
enabling threaded NAPI polling at individual NAPI level using netlink.

Extend the netlink operation `napi-set` and allow setting the threaded
attribute of a NAPI. This will enable the threaded polling on a NAPI
context.

Add a test in `nl_netdev.py` that verifies various cases of threaded
NAPI being set at NAPI and at device level.

Tested
 ./tools/testing/selftests/net/nl_netdev.py
 TAP version 13
 1..7
 ok 1 nl_netdev.empty_check
 ok 2 nl_netdev.lo_check
 ok 3 nl_netdev.page_pool_check
 ok 4 nl_netdev.napi_list_check
 ok 5 nl_netdev.dev_set_threaded
 ok 6 nl_netdev.napi_set_threaded
 ok 7 nl_netdev.nsim_rxq_reset_down
 # Totals: pass:7 fail:0 xfail:0 xpass:0 skip:0 error:0

Signed-off-by: Samiullah Khawaja <skhawaja@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Link: https://patch.msgid.link/20250710211203.3979655-1-skhawaja@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-07-14 18:02:37 -07:00

1139 lines
28 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
#include <linux/netdevice.h>
#include <linux/notifier.h>
#include <linux/rtnetlink.h>
#include <net/busy_poll.h>
#include <net/net_namespace.h>
#include <net/netdev_queues.h>
#include <net/netdev_rx_queue.h>
#include <net/sock.h>
#include <net/xdp.h>
#include <net/xdp_sock.h>
#include <net/page_pool/memory_provider.h>
#include "dev.h"
#include "devmem.h"
#include "netdev-genl-gen.h"
struct netdev_nl_dump_ctx {
unsigned long ifindex;
unsigned int rxq_idx;
unsigned int txq_idx;
unsigned int napi_id;
};
static struct netdev_nl_dump_ctx *netdev_dump_ctx(struct netlink_callback *cb)
{
NL_ASSERT_CTX_FITS(struct netdev_nl_dump_ctx);
return (struct netdev_nl_dump_ctx *)cb->ctx;
}
static int
netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp,
const struct genl_info *info)
{
u64 xsk_features = 0;
u64 xdp_rx_meta = 0;
void *hdr;
netdev_assert_locked(netdev); /* note: rtnl_lock may not be held! */
hdr = genlmsg_iput(rsp, info);
if (!hdr)
return -EMSGSIZE;
#define XDP_METADATA_KFUNC(_, flag, __, xmo) \
if (netdev->xdp_metadata_ops && netdev->xdp_metadata_ops->xmo) \
xdp_rx_meta |= flag;
XDP_METADATA_KFUNC_xxx
#undef XDP_METADATA_KFUNC
if (netdev->xsk_tx_metadata_ops) {
if (netdev->xsk_tx_metadata_ops->tmo_fill_timestamp)
xsk_features |= NETDEV_XSK_FLAGS_TX_TIMESTAMP;
if (netdev->xsk_tx_metadata_ops->tmo_request_checksum)
xsk_features |= NETDEV_XSK_FLAGS_TX_CHECKSUM;
if (netdev->xsk_tx_metadata_ops->tmo_request_launch_time)
xsk_features |= NETDEV_XSK_FLAGS_TX_LAUNCH_TIME_FIFO;
}
if (nla_put_u32(rsp, NETDEV_A_DEV_IFINDEX, netdev->ifindex) ||
nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_FEATURES,
netdev->xdp_features, NETDEV_A_DEV_PAD) ||
nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_RX_METADATA_FEATURES,
xdp_rx_meta, NETDEV_A_DEV_PAD) ||
nla_put_u64_64bit(rsp, NETDEV_A_DEV_XSK_FEATURES,
xsk_features, NETDEV_A_DEV_PAD))
goto err_cancel_msg;
if (netdev->xdp_features & NETDEV_XDP_ACT_XSK_ZEROCOPY) {
if (nla_put_u32(rsp, NETDEV_A_DEV_XDP_ZC_MAX_SEGS,
netdev->xdp_zc_max_segs))
goto err_cancel_msg;
}
genlmsg_end(rsp, hdr);
return 0;
err_cancel_msg:
genlmsg_cancel(rsp, hdr);
return -EMSGSIZE;
}
static void
netdev_genl_dev_notify(struct net_device *netdev, int cmd)
{
struct genl_info info;
struct sk_buff *ntf;
if (!genl_has_listeners(&netdev_nl_family, dev_net(netdev),
NETDEV_NLGRP_MGMT))
return;
genl_info_init_ntf(&info, &netdev_nl_family, cmd);
ntf = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!ntf)
return;
if (netdev_nl_dev_fill(netdev, ntf, &info)) {
nlmsg_free(ntf);
return;
}
genlmsg_multicast_netns(&netdev_nl_family, dev_net(netdev), ntf,
0, NETDEV_NLGRP_MGMT, GFP_KERNEL);
}
int netdev_nl_dev_get_doit(struct sk_buff *skb, struct genl_info *info)
{
struct net_device *netdev;
struct sk_buff *rsp;
u32 ifindex;
int err;
if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX))
return -EINVAL;
ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]);
rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!rsp)
return -ENOMEM;
netdev = netdev_get_by_index_lock(genl_info_net(info), ifindex);
if (!netdev) {
err = -ENODEV;
goto err_free_msg;
}
err = netdev_nl_dev_fill(netdev, rsp, info);
netdev_unlock(netdev);
if (err)
goto err_free_msg;
return genlmsg_reply(rsp, info);
err_free_msg:
nlmsg_free(rsp);
return err;
}
int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
{
struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb);
struct net *net = sock_net(skb->sk);
int err;
for_each_netdev_lock_scoped(net, netdev, ctx->ifindex) {
err = netdev_nl_dev_fill(netdev, skb, genl_info_dump(cb));
if (err < 0)
return err;
}
return 0;
}
static int
netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi,
const struct genl_info *info)
{
unsigned long irq_suspend_timeout;
unsigned long gro_flush_timeout;
u32 napi_defer_hard_irqs;
void *hdr;
pid_t pid;
if (!napi->dev->up)
return 0;
hdr = genlmsg_iput(rsp, info);
if (!hdr)
return -EMSGSIZE;
if (nla_put_u32(rsp, NETDEV_A_NAPI_ID, napi->napi_id))
goto nla_put_failure;
if (nla_put_u32(rsp, NETDEV_A_NAPI_IFINDEX, napi->dev->ifindex))
goto nla_put_failure;
if (napi->irq >= 0 && nla_put_u32(rsp, NETDEV_A_NAPI_IRQ, napi->irq))
goto nla_put_failure;
if (nla_put_uint(rsp, NETDEV_A_NAPI_THREADED,
napi_get_threaded(napi)))
goto nla_put_failure;
if (napi->thread) {
pid = task_pid_nr(napi->thread);
if (nla_put_u32(rsp, NETDEV_A_NAPI_PID, pid))
goto nla_put_failure;
}
napi_defer_hard_irqs = napi_get_defer_hard_irqs(napi);
if (nla_put_s32(rsp, NETDEV_A_NAPI_DEFER_HARD_IRQS,
napi_defer_hard_irqs))
goto nla_put_failure;
irq_suspend_timeout = napi_get_irq_suspend_timeout(napi);
if (nla_put_uint(rsp, NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT,
irq_suspend_timeout))
goto nla_put_failure;
gro_flush_timeout = napi_get_gro_flush_timeout(napi);
if (nla_put_uint(rsp, NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT,
gro_flush_timeout))
goto nla_put_failure;
genlmsg_end(rsp, hdr);
return 0;
nla_put_failure:
genlmsg_cancel(rsp, hdr);
return -EMSGSIZE;
}
int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info)
{
struct napi_struct *napi;
struct sk_buff *rsp;
u32 napi_id;
int err;
if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_NAPI_ID))
return -EINVAL;
napi_id = nla_get_u32(info->attrs[NETDEV_A_NAPI_ID]);
rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!rsp)
return -ENOMEM;
napi = netdev_napi_by_id_lock(genl_info_net(info), napi_id);
if (napi) {
err = netdev_nl_napi_fill_one(rsp, napi, info);
netdev_unlock(napi->dev);
} else {
NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_NAPI_ID]);
err = -ENOENT;
}
if (err) {
goto err_free_msg;
} else if (!rsp->len) {
err = -ENOENT;
goto err_free_msg;
}
return genlmsg_reply(rsp, info);
err_free_msg:
nlmsg_free(rsp);
return err;
}
static int
netdev_nl_napi_dump_one(struct net_device *netdev, struct sk_buff *rsp,
const struct genl_info *info,
struct netdev_nl_dump_ctx *ctx)
{
struct napi_struct *napi;
unsigned int prev_id;
int err = 0;
if (!netdev->up)
return err;
prev_id = UINT_MAX;
list_for_each_entry(napi, &netdev->napi_list, dev_list) {
if (!napi_id_valid(napi->napi_id))
continue;
/* Dump continuation below depends on the list being sorted */
WARN_ON_ONCE(napi->napi_id >= prev_id);
prev_id = napi->napi_id;
if (ctx->napi_id && napi->napi_id >= ctx->napi_id)
continue;
err = netdev_nl_napi_fill_one(rsp, napi, info);
if (err)
return err;
ctx->napi_id = napi->napi_id;
}
return err;
}
int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
{
struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb);
const struct genl_info *info = genl_info_dump(cb);
struct net *net = sock_net(skb->sk);
struct net_device *netdev;
u32 ifindex = 0;
int err = 0;
if (info->attrs[NETDEV_A_NAPI_IFINDEX])
ifindex = nla_get_u32(info->attrs[NETDEV_A_NAPI_IFINDEX]);
if (ifindex) {
netdev = netdev_get_by_index_lock(net, ifindex);
if (netdev) {
err = netdev_nl_napi_dump_one(netdev, skb, info, ctx);
netdev_unlock(netdev);
} else {
err = -ENODEV;
}
} else {
for_each_netdev_lock_scoped(net, netdev, ctx->ifindex) {
err = netdev_nl_napi_dump_one(netdev, skb, info, ctx);
if (err < 0)
break;
ctx->napi_id = 0;
}
}
return err;
}
static int
netdev_nl_napi_set_config(struct napi_struct *napi, struct genl_info *info)
{
u64 irq_suspend_timeout = 0;
u64 gro_flush_timeout = 0;
u8 threaded = 0;
u32 defer = 0;
if (info->attrs[NETDEV_A_NAPI_THREADED]) {
int ret;
threaded = nla_get_uint(info->attrs[NETDEV_A_NAPI_THREADED]);
ret = napi_set_threaded(napi, !!threaded);
if (ret)
return ret;
}
if (info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]) {
defer = nla_get_u32(info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]);
napi_set_defer_hard_irqs(napi, defer);
}
if (info->attrs[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT]) {
irq_suspend_timeout = nla_get_uint(info->attrs[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT]);
napi_set_irq_suspend_timeout(napi, irq_suspend_timeout);
}
if (info->attrs[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT]) {
gro_flush_timeout = nla_get_uint(info->attrs[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT]);
napi_set_gro_flush_timeout(napi, gro_flush_timeout);
}
return 0;
}
int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info)
{
struct napi_struct *napi;
unsigned int napi_id;
int err;
if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_NAPI_ID))
return -EINVAL;
napi_id = nla_get_u32(info->attrs[NETDEV_A_NAPI_ID]);
napi = netdev_napi_by_id_lock(genl_info_net(info), napi_id);
if (napi) {
err = netdev_nl_napi_set_config(napi, info);
netdev_unlock(napi->dev);
} else {
NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_NAPI_ID]);
err = -ENOENT;
}
return err;
}
static int nla_put_napi_id(struct sk_buff *skb, const struct napi_struct *napi)
{
if (napi && napi_id_valid(napi->napi_id))
return nla_put_u32(skb, NETDEV_A_QUEUE_NAPI_ID, napi->napi_id);
return 0;
}
static int
netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev,
u32 q_idx, u32 q_type, const struct genl_info *info)
{
struct pp_memory_provider_params *params;
struct netdev_rx_queue *rxq;
struct netdev_queue *txq;
void *hdr;
hdr = genlmsg_iput(rsp, info);
if (!hdr)
return -EMSGSIZE;
if (nla_put_u32(rsp, NETDEV_A_QUEUE_ID, q_idx) ||
nla_put_u32(rsp, NETDEV_A_QUEUE_TYPE, q_type) ||
nla_put_u32(rsp, NETDEV_A_QUEUE_IFINDEX, netdev->ifindex))
goto nla_put_failure;
switch (q_type) {
case NETDEV_QUEUE_TYPE_RX:
rxq = __netif_get_rx_queue(netdev, q_idx);
if (nla_put_napi_id(rsp, rxq->napi))
goto nla_put_failure;
params = &rxq->mp_params;
if (params->mp_ops &&
params->mp_ops->nl_fill(params->mp_priv, rsp, rxq))
goto nla_put_failure;
#ifdef CONFIG_XDP_SOCKETS
if (rxq->pool)
if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK))
goto nla_put_failure;
#endif
break;
case NETDEV_QUEUE_TYPE_TX:
txq = netdev_get_tx_queue(netdev, q_idx);
if (nla_put_napi_id(rsp, txq->napi))
goto nla_put_failure;
#ifdef CONFIG_XDP_SOCKETS
if (txq->pool)
if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK))
goto nla_put_failure;
#endif
break;
}
genlmsg_end(rsp, hdr);
return 0;
nla_put_failure:
genlmsg_cancel(rsp, hdr);
return -EMSGSIZE;
}
static int netdev_nl_queue_validate(struct net_device *netdev, u32 q_id,
u32 q_type)
{
switch (q_type) {
case NETDEV_QUEUE_TYPE_RX:
if (q_id >= netdev->real_num_rx_queues)
return -EINVAL;
return 0;
case NETDEV_QUEUE_TYPE_TX:
if (q_id >= netdev->real_num_tx_queues)
return -EINVAL;
}
return 0;
}
static int
netdev_nl_queue_fill(struct sk_buff *rsp, struct net_device *netdev, u32 q_idx,
u32 q_type, const struct genl_info *info)
{
int err;
if (!netdev->up)
return -ENOENT;
err = netdev_nl_queue_validate(netdev, q_idx, q_type);
if (err)
return err;
return netdev_nl_queue_fill_one(rsp, netdev, q_idx, q_type, info);
}
int netdev_nl_queue_get_doit(struct sk_buff *skb, struct genl_info *info)
{
u32 q_id, q_type, ifindex;
struct net_device *netdev;
struct sk_buff *rsp;
int err;
if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_ID) ||
GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_TYPE) ||
GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_IFINDEX))
return -EINVAL;
q_id = nla_get_u32(info->attrs[NETDEV_A_QUEUE_ID]);
q_type = nla_get_u32(info->attrs[NETDEV_A_QUEUE_TYPE]);
ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]);
rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!rsp)
return -ENOMEM;
netdev = netdev_get_by_index_lock_ops_compat(genl_info_net(info),
ifindex);
if (netdev) {
err = netdev_nl_queue_fill(rsp, netdev, q_id, q_type, info);
netdev_unlock_ops_compat(netdev);
} else {
err = -ENODEV;
}
if (err)
goto err_free_msg;
return genlmsg_reply(rsp, info);
err_free_msg:
nlmsg_free(rsp);
return err;
}
static int
netdev_nl_queue_dump_one(struct net_device *netdev, struct sk_buff *rsp,
const struct genl_info *info,
struct netdev_nl_dump_ctx *ctx)
{
int err = 0;
if (!netdev->up)
return err;
for (; ctx->rxq_idx < netdev->real_num_rx_queues; ctx->rxq_idx++) {
err = netdev_nl_queue_fill_one(rsp, netdev, ctx->rxq_idx,
NETDEV_QUEUE_TYPE_RX, info);
if (err)
return err;
}
for (; ctx->txq_idx < netdev->real_num_tx_queues; ctx->txq_idx++) {
err = netdev_nl_queue_fill_one(rsp, netdev, ctx->txq_idx,
NETDEV_QUEUE_TYPE_TX, info);
if (err)
return err;
}
return err;
}
int netdev_nl_queue_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
{
struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb);
const struct genl_info *info = genl_info_dump(cb);
struct net *net = sock_net(skb->sk);
struct net_device *netdev;
u32 ifindex = 0;
int err = 0;
if (info->attrs[NETDEV_A_QUEUE_IFINDEX])
ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]);
if (ifindex) {
netdev = netdev_get_by_index_lock_ops_compat(net, ifindex);
if (netdev) {
err = netdev_nl_queue_dump_one(netdev, skb, info, ctx);
netdev_unlock_ops_compat(netdev);
} else {
err = -ENODEV;
}
} else {
for_each_netdev_lock_ops_compat_scoped(net, netdev,
ctx->ifindex) {
err = netdev_nl_queue_dump_one(netdev, skb, info, ctx);
if (err < 0)
break;
ctx->rxq_idx = 0;
ctx->txq_idx = 0;
}
}
return err;
}
#define NETDEV_STAT_NOT_SET (~0ULL)
static void netdev_nl_stats_add(void *_sum, const void *_add, size_t size)
{
const u64 *add = _add;
u64 *sum = _sum;
while (size) {
if (*add != NETDEV_STAT_NOT_SET && *sum != NETDEV_STAT_NOT_SET)
*sum += *add;
sum++;
add++;
size -= 8;
}
}
static int netdev_stat_put(struct sk_buff *rsp, unsigned int attr_id, u64 value)
{
if (value == NETDEV_STAT_NOT_SET)
return 0;
return nla_put_uint(rsp, attr_id, value);
}
static int
netdev_nl_stats_write_rx(struct sk_buff *rsp, struct netdev_queue_stats_rx *rx)
{
if (netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_PACKETS, rx->packets) ||
netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_BYTES, rx->bytes) ||
netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_ALLOC_FAIL, rx->alloc_fail) ||
netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROPS, rx->hw_drops) ||
netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROP_OVERRUNS, rx->hw_drop_overruns) ||
netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_COMPLETE, rx->csum_complete) ||
netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_UNNECESSARY, rx->csum_unnecessary) ||
netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_NONE, rx->csum_none) ||
netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_BAD, rx->csum_bad) ||
netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_PACKETS, rx->hw_gro_packets) ||
netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_BYTES, rx->hw_gro_bytes) ||
netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_WIRE_PACKETS, rx->hw_gro_wire_packets) ||
netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_WIRE_BYTES, rx->hw_gro_wire_bytes) ||
netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROP_RATELIMITS, rx->hw_drop_ratelimits))
return -EMSGSIZE;
return 0;
}
static int
netdev_nl_stats_write_tx(struct sk_buff *rsp, struct netdev_queue_stats_tx *tx)
{
if (netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_PACKETS, tx->packets) ||
netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_BYTES, tx->bytes) ||
netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROPS, tx->hw_drops) ||
netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROP_ERRORS, tx->hw_drop_errors) ||
netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_CSUM_NONE, tx->csum_none) ||
netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_NEEDS_CSUM, tx->needs_csum) ||
netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_PACKETS, tx->hw_gso_packets) ||
netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_BYTES, tx->hw_gso_bytes) ||
netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_WIRE_PACKETS, tx->hw_gso_wire_packets) ||
netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_WIRE_BYTES, tx->hw_gso_wire_bytes) ||
netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROP_RATELIMITS, tx->hw_drop_ratelimits) ||
netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_STOP, tx->stop) ||
netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_WAKE, tx->wake))
return -EMSGSIZE;
return 0;
}
static int
netdev_nl_stats_queue(struct net_device *netdev, struct sk_buff *rsp,
u32 q_type, int i, const struct genl_info *info)
{
const struct netdev_stat_ops *ops = netdev->stat_ops;
struct netdev_queue_stats_rx rx;
struct netdev_queue_stats_tx tx;
void *hdr;
hdr = genlmsg_iput(rsp, info);
if (!hdr)
return -EMSGSIZE;
if (nla_put_u32(rsp, NETDEV_A_QSTATS_IFINDEX, netdev->ifindex) ||
nla_put_u32(rsp, NETDEV_A_QSTATS_QUEUE_TYPE, q_type) ||
nla_put_u32(rsp, NETDEV_A_QSTATS_QUEUE_ID, i))
goto nla_put_failure;
switch (q_type) {
case NETDEV_QUEUE_TYPE_RX:
memset(&rx, 0xff, sizeof(rx));
ops->get_queue_stats_rx(netdev, i, &rx);
if (!memchr_inv(&rx, 0xff, sizeof(rx)))
goto nla_cancel;
if (netdev_nl_stats_write_rx(rsp, &rx))
goto nla_put_failure;
break;
case NETDEV_QUEUE_TYPE_TX:
memset(&tx, 0xff, sizeof(tx));
ops->get_queue_stats_tx(netdev, i, &tx);
if (!memchr_inv(&tx, 0xff, sizeof(tx)))
goto nla_cancel;
if (netdev_nl_stats_write_tx(rsp, &tx))
goto nla_put_failure;
break;
}
genlmsg_end(rsp, hdr);
return 0;
nla_cancel:
genlmsg_cancel(rsp, hdr);
return 0;
nla_put_failure:
genlmsg_cancel(rsp, hdr);
return -EMSGSIZE;
}
static int
netdev_nl_stats_by_queue(struct net_device *netdev, struct sk_buff *rsp,
const struct genl_info *info,
struct netdev_nl_dump_ctx *ctx)
{
const struct netdev_stat_ops *ops = netdev->stat_ops;
int i, err;
if (!(netdev->flags & IFF_UP))
return 0;
i = ctx->rxq_idx;
while (ops->get_queue_stats_rx && i < netdev->real_num_rx_queues) {
err = netdev_nl_stats_queue(netdev, rsp, NETDEV_QUEUE_TYPE_RX,
i, info);
if (err)
return err;
ctx->rxq_idx = ++i;
}
i = ctx->txq_idx;
while (ops->get_queue_stats_tx && i < netdev->real_num_tx_queues) {
err = netdev_nl_stats_queue(netdev, rsp, NETDEV_QUEUE_TYPE_TX,
i, info);
if (err)
return err;
ctx->txq_idx = ++i;
}
ctx->rxq_idx = 0;
ctx->txq_idx = 0;
return 0;
}
/**
* netdev_stat_queue_sum() - add up queue stats from range of queues
* @netdev: net_device
* @rx_start: index of the first Rx queue to query
* @rx_end: index after the last Rx queue (first *not* to query)
* @rx_sum: output Rx stats, should be already initialized
* @tx_start: index of the first Tx queue to query
* @tx_end: index after the last Tx queue (first *not* to query)
* @tx_sum: output Tx stats, should be already initialized
*
* Add stats from [start, end) range of queue IDs to *x_sum structs.
* The sum structs must be already initialized. Usually this
* helper is invoked from the .get_base_stats callbacks of drivers
* to account for stats of disabled queues. In that case the ranges
* are usually [netdev->real_num_*x_queues, netdev->num_*x_queues).
*/
void netdev_stat_queue_sum(struct net_device *netdev,
int rx_start, int rx_end,
struct netdev_queue_stats_rx *rx_sum,
int tx_start, int tx_end,
struct netdev_queue_stats_tx *tx_sum)
{
const struct netdev_stat_ops *ops;
struct netdev_queue_stats_rx rx;
struct netdev_queue_stats_tx tx;
int i;
ops = netdev->stat_ops;
for (i = rx_start; i < rx_end; i++) {
memset(&rx, 0xff, sizeof(rx));
if (ops->get_queue_stats_rx)
ops->get_queue_stats_rx(netdev, i, &rx);
netdev_nl_stats_add(rx_sum, &rx, sizeof(rx));
}
for (i = tx_start; i < tx_end; i++) {
memset(&tx, 0xff, sizeof(tx));
if (ops->get_queue_stats_tx)
ops->get_queue_stats_tx(netdev, i, &tx);
netdev_nl_stats_add(tx_sum, &tx, sizeof(tx));
}
}
EXPORT_SYMBOL(netdev_stat_queue_sum);
static int
netdev_nl_stats_by_netdev(struct net_device *netdev, struct sk_buff *rsp,
const struct genl_info *info)
{
struct netdev_queue_stats_rx rx_sum;
struct netdev_queue_stats_tx tx_sum;
void *hdr;
/* Netdev can't guarantee any complete counters */
if (!netdev->stat_ops->get_base_stats)
return 0;
memset(&rx_sum, 0xff, sizeof(rx_sum));
memset(&tx_sum, 0xff, sizeof(tx_sum));
netdev->stat_ops->get_base_stats(netdev, &rx_sum, &tx_sum);
/* The op was there, but nothing reported, don't bother */
if (!memchr_inv(&rx_sum, 0xff, sizeof(rx_sum)) &&
!memchr_inv(&tx_sum, 0xff, sizeof(tx_sum)))
return 0;
hdr = genlmsg_iput(rsp, info);
if (!hdr)
return -EMSGSIZE;
if (nla_put_u32(rsp, NETDEV_A_QSTATS_IFINDEX, netdev->ifindex))
goto nla_put_failure;
netdev_stat_queue_sum(netdev, 0, netdev->real_num_rx_queues, &rx_sum,
0, netdev->real_num_tx_queues, &tx_sum);
if (netdev_nl_stats_write_rx(rsp, &rx_sum) ||
netdev_nl_stats_write_tx(rsp, &tx_sum))
goto nla_put_failure;
genlmsg_end(rsp, hdr);
return 0;
nla_put_failure:
genlmsg_cancel(rsp, hdr);
return -EMSGSIZE;
}
static int
netdev_nl_qstats_get_dump_one(struct net_device *netdev, unsigned int scope,
struct sk_buff *skb, const struct genl_info *info,
struct netdev_nl_dump_ctx *ctx)
{
if (!netdev->stat_ops)
return 0;
switch (scope) {
case 0:
return netdev_nl_stats_by_netdev(netdev, skb, info);
case NETDEV_QSTATS_SCOPE_QUEUE:
return netdev_nl_stats_by_queue(netdev, skb, info, ctx);
}
return -EINVAL; /* Should not happen, per netlink policy */
}
int netdev_nl_qstats_get_dumpit(struct sk_buff *skb,
struct netlink_callback *cb)
{
struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb);
const struct genl_info *info = genl_info_dump(cb);
struct net *net = sock_net(skb->sk);
struct net_device *netdev;
unsigned int ifindex;
unsigned int scope;
int err = 0;
scope = 0;
if (info->attrs[NETDEV_A_QSTATS_SCOPE])
scope = nla_get_uint(info->attrs[NETDEV_A_QSTATS_SCOPE]);
ifindex = 0;
if (info->attrs[NETDEV_A_QSTATS_IFINDEX])
ifindex = nla_get_u32(info->attrs[NETDEV_A_QSTATS_IFINDEX]);
if (ifindex) {
netdev = netdev_get_by_index_lock_ops_compat(net, ifindex);
if (!netdev) {
NL_SET_BAD_ATTR(info->extack,
info->attrs[NETDEV_A_QSTATS_IFINDEX]);
return -ENODEV;
}
if (netdev->stat_ops) {
err = netdev_nl_qstats_get_dump_one(netdev, scope, skb,
info, ctx);
} else {
NL_SET_BAD_ATTR(info->extack,
info->attrs[NETDEV_A_QSTATS_IFINDEX]);
err = -EOPNOTSUPP;
}
netdev_unlock_ops_compat(netdev);
return err;
}
for_each_netdev_lock_ops_compat_scoped(net, netdev, ctx->ifindex) {
err = netdev_nl_qstats_get_dump_one(netdev, scope, skb,
info, ctx);
if (err < 0)
break;
}
return err;
}
int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr *tb[ARRAY_SIZE(netdev_queue_id_nl_policy)];
struct net_devmem_dmabuf_binding *binding;
u32 ifindex, dmabuf_fd, rxq_idx;
struct netdev_nl_sock *priv;
struct net_device *netdev;
struct sk_buff *rsp;
struct nlattr *attr;
int rem, err = 0;
void *hdr;
if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX) ||
GENL_REQ_ATTR_CHECK(info, NETDEV_A_DMABUF_FD) ||
GENL_REQ_ATTR_CHECK(info, NETDEV_A_DMABUF_QUEUES))
return -EINVAL;
ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]);
dmabuf_fd = nla_get_u32(info->attrs[NETDEV_A_DMABUF_FD]);
priv = genl_sk_priv_get(&netdev_nl_family, NETLINK_CB(skb).sk);
if (IS_ERR(priv))
return PTR_ERR(priv);
rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!rsp)
return -ENOMEM;
hdr = genlmsg_iput(rsp, info);
if (!hdr) {
err = -EMSGSIZE;
goto err_genlmsg_free;
}
mutex_lock(&priv->lock);
err = 0;
netdev = netdev_get_by_index_lock(genl_info_net(info), ifindex);
if (!netdev) {
err = -ENODEV;
goto err_unlock_sock;
}
if (!netif_device_present(netdev))
err = -ENODEV;
else if (!netdev_need_ops_lock(netdev))
err = -EOPNOTSUPP;
if (err) {
NL_SET_BAD_ATTR(info->extack,
info->attrs[NETDEV_A_DEV_IFINDEX]);
goto err_unlock;
}
binding = net_devmem_bind_dmabuf(netdev, DMA_FROM_DEVICE, dmabuf_fd,
priv, info->extack);
if (IS_ERR(binding)) {
err = PTR_ERR(binding);
goto err_unlock;
}
nla_for_each_attr_type(attr, NETDEV_A_DMABUF_QUEUES,
genlmsg_data(info->genlhdr),
genlmsg_len(info->genlhdr), rem) {
err = nla_parse_nested(
tb, ARRAY_SIZE(netdev_queue_id_nl_policy) - 1, attr,
netdev_queue_id_nl_policy, info->extack);
if (err < 0)
goto err_unbind;
if (NL_REQ_ATTR_CHECK(info->extack, attr, tb, NETDEV_A_QUEUE_ID) ||
NL_REQ_ATTR_CHECK(info->extack, attr, tb, NETDEV_A_QUEUE_TYPE)) {
err = -EINVAL;
goto err_unbind;
}
if (nla_get_u32(tb[NETDEV_A_QUEUE_TYPE]) != NETDEV_QUEUE_TYPE_RX) {
NL_SET_BAD_ATTR(info->extack, tb[NETDEV_A_QUEUE_TYPE]);
err = -EINVAL;
goto err_unbind;
}
rxq_idx = nla_get_u32(tb[NETDEV_A_QUEUE_ID]);
err = net_devmem_bind_dmabuf_to_queue(netdev, rxq_idx, binding,
info->extack);
if (err)
goto err_unbind;
}
nla_put_u32(rsp, NETDEV_A_DMABUF_ID, binding->id);
genlmsg_end(rsp, hdr);
err = genlmsg_reply(rsp, info);
if (err)
goto err_unbind;
netdev_unlock(netdev);
mutex_unlock(&priv->lock);
return 0;
err_unbind:
net_devmem_unbind_dmabuf(binding);
err_unlock:
netdev_unlock(netdev);
err_unlock_sock:
mutex_unlock(&priv->lock);
err_genlmsg_free:
nlmsg_free(rsp);
return err;
}
int netdev_nl_bind_tx_doit(struct sk_buff *skb, struct genl_info *info)
{
struct net_devmem_dmabuf_binding *binding;
struct netdev_nl_sock *priv;
struct net_device *netdev;
u32 ifindex, dmabuf_fd;
struct sk_buff *rsp;
int err = 0;
void *hdr;
if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX) ||
GENL_REQ_ATTR_CHECK(info, NETDEV_A_DMABUF_FD))
return -EINVAL;
ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]);
dmabuf_fd = nla_get_u32(info->attrs[NETDEV_A_DMABUF_FD]);
priv = genl_sk_priv_get(&netdev_nl_family, NETLINK_CB(skb).sk);
if (IS_ERR(priv))
return PTR_ERR(priv);
rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!rsp)
return -ENOMEM;
hdr = genlmsg_iput(rsp, info);
if (!hdr) {
err = -EMSGSIZE;
goto err_genlmsg_free;
}
mutex_lock(&priv->lock);
netdev = netdev_get_by_index_lock(genl_info_net(info), ifindex);
if (!netdev) {
err = -ENODEV;
goto err_unlock_sock;
}
if (!netif_device_present(netdev)) {
err = -ENODEV;
goto err_unlock_netdev;
}
if (!netdev->netmem_tx) {
err = -EOPNOTSUPP;
NL_SET_ERR_MSG(info->extack,
"Driver does not support netmem TX");
goto err_unlock_netdev;
}
binding = net_devmem_bind_dmabuf(netdev, DMA_TO_DEVICE, dmabuf_fd, priv,
info->extack);
if (IS_ERR(binding)) {
err = PTR_ERR(binding);
goto err_unlock_netdev;
}
nla_put_u32(rsp, NETDEV_A_DMABUF_ID, binding->id);
genlmsg_end(rsp, hdr);
netdev_unlock(netdev);
mutex_unlock(&priv->lock);
return genlmsg_reply(rsp, info);
err_unlock_netdev:
netdev_unlock(netdev);
err_unlock_sock:
mutex_unlock(&priv->lock);
err_genlmsg_free:
nlmsg_free(rsp);
return err;
}
void netdev_nl_sock_priv_init(struct netdev_nl_sock *priv)
{
INIT_LIST_HEAD(&priv->bindings);
mutex_init(&priv->lock);
}
void netdev_nl_sock_priv_destroy(struct netdev_nl_sock *priv)
{
struct net_devmem_dmabuf_binding *binding;
struct net_devmem_dmabuf_binding *temp;
netdevice_tracker dev_tracker;
struct net_device *dev;
mutex_lock(&priv->lock);
list_for_each_entry_safe(binding, temp, &priv->bindings, list) {
mutex_lock(&binding->lock);
dev = binding->dev;
if (!dev) {
mutex_unlock(&binding->lock);
net_devmem_unbind_dmabuf(binding);
continue;
}
netdev_hold(dev, &dev_tracker, GFP_KERNEL);
mutex_unlock(&binding->lock);
netdev_lock(dev);
net_devmem_unbind_dmabuf(binding);
netdev_unlock(dev);
netdev_put(dev, &dev_tracker);
}
mutex_unlock(&priv->lock);
}
static int netdev_genl_netdevice_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
switch (event) {
case NETDEV_REGISTER:
netdev_lock_ops_to_full(netdev);
netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_ADD_NTF);
netdev_unlock_full_to_ops(netdev);
break;
case NETDEV_UNREGISTER:
netdev_lock(netdev);
netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_DEL_NTF);
netdev_unlock(netdev);
break;
case NETDEV_XDP_FEAT_CHANGE:
netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_CHANGE_NTF);
break;
}
return NOTIFY_OK;
}
static struct notifier_block netdev_genl_nb = {
.notifier_call = netdev_genl_netdevice_event,
};
static int __init netdev_genl_init(void)
{
int err;
err = register_netdevice_notifier(&netdev_genl_nb);
if (err)
return err;
err = genl_register_family(&netdev_nl_family);
if (err)
goto err_unreg_ntf;
return 0;
err_unreg_ntf:
unregister_netdevice_notifier(&netdev_genl_nb);
return err;
}
subsys_initcall(netdev_genl_init);