summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/devlink.c719
-rw-r--r--net/core/fib_rules.c2
-rw-r--r--net/core/flow_dissector.c4
-rw-r--r--net/core/netpoll.c4
-rw-r--r--net/core/page_pool.c28
-rw-r--r--net/core/pktgen.c38
-rw-r--r--net/core/rtnetlink.c42
-rw-r--r--net/core/skbuff.c20
-rw-r--r--net/core/sock.c113
9 files changed, 875 insertions, 95 deletions
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 4eb969518ee0..566ddd147633 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -190,6 +190,80 @@ static struct devlink_port *devlink_port_get_from_info(struct devlink *devlink,
return devlink_port_get_from_attrs(devlink, info->attrs);
}
+static inline bool
+devlink_rate_is_leaf(struct devlink_rate *devlink_rate)
+{
+ return devlink_rate->type == DEVLINK_RATE_TYPE_LEAF;
+}
+
+static inline bool
+devlink_rate_is_node(struct devlink_rate *devlink_rate)
+{
+ return devlink_rate->type == DEVLINK_RATE_TYPE_NODE;
+}
+
+static struct devlink_rate *
+devlink_rate_leaf_get_from_info(struct devlink *devlink, struct genl_info *info)
+{
+ struct devlink_rate *devlink_rate;
+ struct devlink_port *devlink_port;
+
+ devlink_port = devlink_port_get_from_attrs(devlink, info->attrs);
+ if (IS_ERR(devlink_port))
+ return ERR_CAST(devlink_port);
+ devlink_rate = devlink_port->devlink_rate;
+ return devlink_rate ?: ERR_PTR(-ENODEV);
+}
+
+static struct devlink_rate *
+devlink_rate_node_get_by_name(struct devlink *devlink, const char *node_name)
+{
+ static struct devlink_rate *devlink_rate;
+
+ list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
+ if (devlink_rate_is_node(devlink_rate) &&
+ !strcmp(node_name, devlink_rate->name))
+ return devlink_rate;
+ }
+ return ERR_PTR(-ENODEV);
+}
+
+static struct devlink_rate *
+devlink_rate_node_get_from_attrs(struct devlink *devlink, struct nlattr **attrs)
+{
+ const char *rate_node_name;
+ size_t len;
+
+ if (!attrs[DEVLINK_ATTR_RATE_NODE_NAME])
+ return ERR_PTR(-EINVAL);
+ rate_node_name = nla_data(attrs[DEVLINK_ATTR_RATE_NODE_NAME]);
+ len = strlen(rate_node_name);
+ /* Name cannot be empty or decimal number */
+ if (!len || strspn(rate_node_name, "0123456789") == len)
+ return ERR_PTR(-EINVAL);
+
+ return devlink_rate_node_get_by_name(devlink, rate_node_name);
+}
+
+static struct devlink_rate *
+devlink_rate_node_get_from_info(struct devlink *devlink, struct genl_info *info)
+{
+ return devlink_rate_node_get_from_attrs(devlink, info->attrs);
+}
+
+static struct devlink_rate *
+devlink_rate_get_from_info(struct devlink *devlink, struct genl_info *info)
+{
+ struct nlattr **attrs = info->attrs;
+
+ if (attrs[DEVLINK_ATTR_PORT_INDEX])
+ return devlink_rate_leaf_get_from_info(devlink, info);
+ else if (attrs[DEVLINK_ATTR_RATE_NODE_NAME])
+ return devlink_rate_node_get_from_info(devlink, info);
+ else
+ return ERR_PTR(-EINVAL);
+}
+
struct devlink_sb {
struct list_head list;
unsigned int index;
@@ -408,12 +482,14 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id)
#define DEVLINK_NL_FLAG_NEED_PORT BIT(0)
#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1)
+#define DEVLINK_NL_FLAG_NEED_RATE BIT(2)
+#define DEVLINK_NL_FLAG_NEED_RATE_NODE BIT(3)
/* The per devlink instance lock is taken by default in the pre-doit
* operation, yet several commands do not require this. The global
* devlink lock is taken and protects from disruption by user-calls.
*/
-#define DEVLINK_NL_FLAG_NO_LOCK BIT(2)
+#define DEVLINK_NL_FLAG_NO_LOCK BIT(4)
static int devlink_nl_pre_doit(const struct genl_ops *ops,
struct sk_buff *skb, struct genl_info *info)
@@ -442,6 +518,24 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops,
devlink_port = devlink_port_get_from_info(devlink, info);
if (!IS_ERR(devlink_port))
info->user_ptr[1] = devlink_port;
+ } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_RATE) {
+ struct devlink_rate *devlink_rate;
+
+ devlink_rate = devlink_rate_get_from_info(devlink, info);
+ if (IS_ERR(devlink_rate)) {
+ err = PTR_ERR(devlink_rate);
+ goto unlock;
+ }
+ info->user_ptr[1] = devlink_rate;
+ } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_RATE_NODE) {
+ struct devlink_rate *rate_node;
+
+ rate_node = devlink_rate_node_get_from_info(devlink, info);
+ if (IS_ERR(rate_node)) {
+ err = PTR_ERR(rate_node);
+ goto unlock;
+ }
+ info->user_ptr[1] = rate_node;
}
return 0;
@@ -705,7 +799,6 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg,
case DEVLINK_PORT_FLAVOUR_PHYSICAL:
case DEVLINK_PORT_FLAVOUR_CPU:
case DEVLINK_PORT_FLAVOUR_DSA:
- case DEVLINK_PORT_FLAVOUR_VIRTUAL:
if (nla_put_u32(msg, DEVLINK_ATTR_PORT_NUMBER,
attrs->phys.port_number))
return -EMSGSIZE;
@@ -749,6 +842,56 @@ devlink_port_fn_hw_addr_fill(struct devlink *devlink, const struct devlink_ops *
return 0;
}
+static int devlink_nl_rate_fill(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct devlink_rate *devlink_rate,
+ enum devlink_command cmd, u32 portid,
+ u32 seq, int flags,
+ struct netlink_ext_ack *extack)
+{
+ void *hdr;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+
+ if (nla_put_u16(msg, DEVLINK_ATTR_RATE_TYPE, devlink_rate->type))
+ goto nla_put_failure;
+
+ if (devlink_rate_is_leaf(devlink_rate)) {
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX,
+ devlink_rate->devlink_port->index))
+ goto nla_put_failure;
+ } else if (devlink_rate_is_node(devlink_rate)) {
+ if (nla_put_string(msg, DEVLINK_ATTR_RATE_NODE_NAME,
+ devlink_rate->name))
+ goto nla_put_failure;
+ }
+
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_RATE_TX_SHARE,
+ devlink_rate->tx_share, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_RATE_TX_MAX,
+ devlink_rate->tx_max, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ if (devlink_rate->parent)
+ if (nla_put_string(msg, DEVLINK_ATTR_RATE_PARENT_NODE_NAME,
+ devlink_rate->parent->name))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
static bool
devlink_port_fn_state_valid(enum devlink_port_fn_state state)
{
@@ -920,6 +1063,111 @@ static void devlink_port_notify(struct devlink_port *devlink_port,
msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
}
+static void devlink_rate_notify(struct devlink_rate *devlink_rate,
+ enum devlink_command cmd)
+{
+ struct devlink *devlink = devlink_rate->devlink;
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON(cmd != DEVLINK_CMD_RATE_NEW &&
+ cmd != DEVLINK_CMD_RATE_DEL);
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ err = devlink_nl_rate_fill(msg, devlink, devlink_rate,
+ cmd, 0, 0, 0, NULL);
+ if (err) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+ msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb)
+{
+ struct devlink_rate *devlink_rate;
+ struct devlink *devlink;
+ int start = cb->args[0];
+ int idx = 0;
+ int err = 0;
+
+ mutex_lock(&devlink_mutex);
+ list_for_each_entry(devlink, &devlink_list, list) {
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ continue;
+ mutex_lock(&devlink->lock);
+ list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
+ enum devlink_command cmd = DEVLINK_CMD_RATE_NEW;
+ u32 id = NETLINK_CB(cb->skb).portid;
+
+ if (idx < start) {
+ idx++;
+ continue;
+ }
+ err = devlink_nl_rate_fill(msg, devlink,
+ devlink_rate,
+ cmd, id,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI, NULL);
+ if (err) {
+ mutex_unlock(&devlink->lock);
+ goto out;
+ }
+ idx++;
+ }
+ mutex_unlock(&devlink->lock);
+ }
+out:
+ mutex_unlock(&devlink_mutex);
+ if (err != -EMSGSIZE)
+ return err;
+
+ cb->args[0] = idx;
+ return msg->len;
+}
+
+static int devlink_nl_cmd_rate_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink_rate *devlink_rate = info->user_ptr[1];
+ struct devlink *devlink = devlink_rate->devlink;
+ struct sk_buff *msg;
+ int err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_rate_fill(msg, devlink, devlink_rate,
+ DEVLINK_CMD_RATE_NEW,
+ info->snd_portid, info->snd_seq, 0,
+ info->extack);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static bool
+devlink_rate_is_parent_node(struct devlink_rate *devlink_rate,
+ struct devlink_rate *parent)
+{
+ while (parent) {
+ if (parent == devlink_rate)
+ return true;
+ parent = parent->parent;
+ }
+ return false;
+}
+
static int devlink_nl_cmd_get_doit(struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
@@ -1340,6 +1588,255 @@ static int devlink_nl_cmd_port_del_doit(struct sk_buff *skb,
return devlink->ops->port_del(devlink, port_index, extack);
}
+static int
+devlink_nl_rate_parent_node_set(struct devlink_rate *devlink_rate,
+ struct genl_info *info,
+ struct nlattr *nla_parent)
+{
+ struct devlink *devlink = devlink_rate->devlink;
+ const char *parent_name = nla_data(nla_parent);
+ const struct devlink_ops *ops = devlink->ops;
+ size_t len = strlen(parent_name);
+ struct devlink_rate *parent;
+ int err = -EOPNOTSUPP;
+
+ parent = devlink_rate->parent;
+ if (parent && len) {
+ NL_SET_ERR_MSG_MOD(info->extack, "Rate object already has parent.");
+ return -EBUSY;
+ } else if (parent && !len) {
+ if (devlink_rate_is_leaf(devlink_rate))
+ err = ops->rate_leaf_parent_set(devlink_rate, NULL,
+ devlink_rate->priv, NULL,
+ info->extack);
+ else if (devlink_rate_is_node(devlink_rate))
+ err = ops->rate_node_parent_set(devlink_rate, NULL,
+ devlink_rate->priv, NULL,
+ info->extack);
+ if (err)
+ return err;
+
+ refcount_dec(&parent->refcnt);
+ devlink_rate->parent = NULL;
+ } else if (!parent && len) {
+ parent = devlink_rate_node_get_by_name(devlink, parent_name);
+ if (IS_ERR(parent))
+ return -ENODEV;
+
+ if (parent == devlink_rate) {
+ NL_SET_ERR_MSG_MOD(info->extack, "Parent to self is not allowed");
+ return -EINVAL;
+ }
+
+ if (devlink_rate_is_node(devlink_rate) &&
+ devlink_rate_is_parent_node(devlink_rate, parent->parent)) {
+ NL_SET_ERR_MSG_MOD(info->extack, "Node is already a parent of parent node.");
+ return -EEXIST;
+ }
+
+ if (devlink_rate_is_leaf(devlink_rate))
+ err = ops->rate_leaf_parent_set(devlink_rate, parent,
+ devlink_rate->priv, parent->priv,
+ info->extack);
+ else if (devlink_rate_is_node(devlink_rate))
+ err = ops->rate_node_parent_set(devlink_rate, parent,
+ devlink_rate->priv, parent->priv,
+ info->extack);
+ if (err)
+ return err;
+
+ refcount_inc(&parent->refcnt);
+ devlink_rate->parent = parent;
+ }
+
+ return 0;
+}
+
+static int devlink_nl_rate_set(struct devlink_rate *devlink_rate,
+ const struct devlink_ops *ops,
+ struct genl_info *info)
+{
+ struct nlattr *nla_parent, **attrs = info->attrs;
+ int err = -EOPNOTSUPP;
+ u64 rate;
+
+ if (attrs[DEVLINK_ATTR_RATE_TX_SHARE]) {
+ rate = nla_get_u64(attrs[DEVLINK_ATTR_RATE_TX_SHARE]);
+ if (devlink_rate_is_leaf(devlink_rate))
+ err = ops->rate_leaf_tx_share_set(devlink_rate, devlink_rate->priv,
+ rate, info->extack);
+ else if (devlink_rate_is_node(devlink_rate))
+ err = ops->rate_node_tx_share_set(devlink_rate, devlink_rate->priv,
+ rate, info->extack);
+ if (err)
+ return err;
+ devlink_rate->tx_share = rate;
+ }
+
+ if (attrs[DEVLINK_ATTR_RATE_TX_MAX]) {
+ rate = nla_get_u64(attrs[DEVLINK_ATTR_RATE_TX_MAX]);
+ if (devlink_rate_is_leaf(devlink_rate))
+ err = ops->rate_leaf_tx_max_set(devlink_rate, devlink_rate->priv,
+ rate, info->extack);
+ else if (devlink_rate_is_node(devlink_rate))
+ err = ops->rate_node_tx_max_set(devlink_rate, devlink_rate->priv,
+ rate, info->extack);
+ if (err)
+ return err;
+ devlink_rate->tx_max = rate;
+ }
+
+ nla_parent = attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME];
+ if (nla_parent) {
+ err = devlink_nl_rate_parent_node_set(devlink_rate, info,
+ nla_parent);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops,
+ struct genl_info *info,
+ enum devlink_rate_type type)
+{
+ struct nlattr **attrs = info->attrs;
+
+ if (type == DEVLINK_RATE_TYPE_LEAF) {
+ if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_leaf_tx_share_set) {
+ NL_SET_ERR_MSG_MOD(info->extack, "TX share set isn't supported for the leafs");
+ return false;
+ }
+ if (attrs[DEVLINK_ATTR_RATE_TX_MAX] && !ops->rate_leaf_tx_max_set) {
+ NL_SET_ERR_MSG_MOD(info->extack, "TX max set isn't supported for the leafs");
+ return false;
+ }
+ if (attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] &&
+ !ops->rate_leaf_parent_set) {
+ NL_SET_ERR_MSG_MOD(info->extack, "Parent set isn't supported for the leafs");
+ return false;
+ }
+ } else if (type == DEVLINK_RATE_TYPE_NODE) {
+ if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_node_tx_share_set) {
+ NL_SET_ERR_MSG_MOD(info->extack, "TX share set isn't supported for the nodes");
+ return false;
+ }
+ if (attrs[DEVLINK_ATTR_RATE_TX_MAX] && !ops->rate_node_tx_max_set) {
+ NL_SET_ERR_MSG_MOD(info->extack, "TX max set isn't supported for the nodes");
+ return false;
+ }
+ if (attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] &&
+ !ops->rate_node_parent_set) {
+ NL_SET_ERR_MSG_MOD(info->extack, "Parent set isn't supported for the nodes");
+ return false;
+ }
+ } else {
+ WARN(1, "Unknown type of rate object");
+ return false;
+ }
+
+ return true;
+}
+
+static int devlink_nl_cmd_rate_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink_rate *devlink_rate = info->user_ptr[1];
+ struct devlink *devlink = devlink_rate->devlink;
+ const struct devlink_ops *ops = devlink->ops;
+ int err;
+
+ if (!ops || !devlink_rate_set_ops_supported(ops, info, devlink_rate->type))
+ return -EOPNOTSUPP;
+
+ err = devlink_nl_rate_set(devlink_rate, ops, info);
+
+ if (!err)
+ devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_NEW);
+ return err;
+}
+
+static int devlink_nl_cmd_rate_new_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_rate *rate_node;
+ const struct devlink_ops *ops;
+ int err;
+
+ ops = devlink->ops;
+ if (!ops || !ops->rate_node_new || !ops->rate_node_del) {
+ NL_SET_ERR_MSG_MOD(info->extack, "Rate nodes aren't supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (!devlink_rate_set_ops_supported(ops, info, DEVLINK_RATE_TYPE_NODE))
+ return -EOPNOTSUPP;
+
+ rate_node = devlink_rate_node_get_from_attrs(devlink, info->attrs);
+ if (!IS_ERR(rate_node))
+ return -EEXIST;
+ else if (rate_node == ERR_PTR(-EINVAL))
+ return -EINVAL;
+
+ rate_node = kzalloc(sizeof(*rate_node), GFP_KERNEL);
+ if (!rate_node)
+ return -ENOMEM;
+
+ rate_node->devlink = devlink;
+ rate_node->type = DEVLINK_RATE_TYPE_NODE;
+ rate_node->name = nla_strdup(info->attrs[DEVLINK_ATTR_RATE_NODE_NAME], GFP_KERNEL);
+ if (!rate_node->name) {
+ err = -ENOMEM;
+ goto err_strdup;
+ }
+
+ err = ops->rate_node_new(rate_node, &rate_node->priv, info->extack);
+ if (err)
+ goto err_node_new;
+
+ err = devlink_nl_rate_set(rate_node, ops, info);
+ if (err)
+ goto err_rate_set;
+
+ refcount_set(&rate_node->refcnt, 1);
+ list_add(&rate_node->list, &devlink->rate_list);
+ devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW);
+ return 0;
+
+err_rate_set:
+ ops->rate_node_del(rate_node, rate_node->priv, info->extack);
+err_node_new:
+ kfree(rate_node->name);
+err_strdup:
+ kfree(rate_node);
+ return err;
+}
+
+static int devlink_nl_cmd_rate_del_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink_rate *rate_node = info->user_ptr[1];
+ struct devlink *devlink = rate_node->devlink;
+ const struct devlink_ops *ops = devlink->ops;
+ int err;
+
+ if (refcount_read(&rate_node->refcnt) > 1) {
+ NL_SET_ERR_MSG_MOD(info->extack, "Node has children. Cannot delete node.");
+ return -EBUSY;
+ }
+
+ devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_DEL);
+ err = ops->rate_node_del(rate_node, rate_node->priv, info->extack);
+ if (rate_node->parent)
+ refcount_dec(&rate_node->parent->refcnt);
+ list_del(&rate_node->list);
+ kfree(rate_node->name);
+ kfree(rate_node);
+ return err;
+}
+
static int devlink_nl_sb_fill(struct sk_buff *msg, struct devlink *devlink,
struct devlink_sb *devlink_sb,
enum devlink_command cmd, u32 portid,
@@ -2208,6 +2705,30 @@ static int devlink_nl_cmd_eswitch_get_doit(struct sk_buff *skb,
return genlmsg_reply(msg, info);
}
+static int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
+ struct netlink_ext_ack *extack)
+{
+ struct devlink_rate *devlink_rate;
+ u16 old_mode;
+ int err;
+
+ if (!devlink->ops->eswitch_mode_get)
+ return -EOPNOTSUPP;
+ err = devlink->ops->eswitch_mode_get(devlink, &old_mode);
+ if (err)
+ return err;
+
+ if (old_mode == mode)
+ return 0;
+
+ list_for_each_entry(devlink_rate, &devlink->rate_list, list)
+ if (devlink_rate_is_node(devlink_rate)) {
+ NL_SET_ERR_MSG_MOD(extack, "Rate node(s) exists.");
+ return -EBUSY;
+ }
+ return 0;
+}
+
static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb,
struct genl_info *info)
{
@@ -2222,6 +2743,9 @@ static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb,
if (!ops->eswitch_mode_set)
return -EOPNOTSUPP;
mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]);
+ err = devlink_rate_nodes_check(devlink, mode, info->extack);
+ if (err)
+ return err;
err = ops->eswitch_mode_set(devlink, mode, info->extack);
if (err)
return err;
@@ -6995,8 +7519,9 @@ static void devlink_trap_stats_read(struct devlink_stats __percpu *trap_stats,
}
}
-static int devlink_trap_stats_put(struct sk_buff *msg,
- struct devlink_stats __percpu *trap_stats)
+static int
+devlink_trap_group_stats_put(struct sk_buff *msg,
+ struct devlink_stats __percpu *trap_stats)
{
struct devlink_stats stats;
struct nlattr *attr;
@@ -7024,6 +7549,50 @@ nla_put_failure:
return -EMSGSIZE;
}
+static int devlink_trap_stats_put(struct sk_buff *msg, struct devlink *devlink,
+ const struct devlink_trap_item *trap_item)
+{
+ struct devlink_stats stats;
+ struct nlattr *attr;
+ u64 drops = 0;
+ int err;
+
+ if (devlink->ops->trap_drop_counter_get) {
+ err = devlink->ops->trap_drop_counter_get(devlink,
+ trap_item->trap,
+ &drops);
+ if (err)
+ return err;
+ }
+
+ devlink_trap_stats_read(trap_item->stats, &stats);
+
+ attr = nla_nest_start(msg, DEVLINK_ATTR_STATS);
+ if (!attr)
+ return -EMSGSIZE;
+
+ if (devlink->ops->trap_drop_counter_get &&
+ nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_DROPPED, drops,
+ DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS,
+ stats.rx_packets, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES,
+ stats.rx_bytes, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, attr);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, attr);
+ return -EMSGSIZE;
+}
+
static int devlink_nl_trap_fill(struct sk_buff *msg, struct devlink *devlink,
const struct devlink_trap_item *trap_item,
enum devlink_command cmd, u32 portid, u32 seq,
@@ -7061,7 +7630,7 @@ static int devlink_nl_trap_fill(struct sk_buff *msg, struct devlink *devlink,
if (err)
goto nla_put_failure;
- err = devlink_trap_stats_put(msg, trap_item->stats);
+ err = devlink_trap_stats_put(msg, devlink, trap_item);
if (err)
goto nla_put_failure;
@@ -7278,7 +7847,7 @@ devlink_nl_trap_group_fill(struct sk_buff *msg, struct devlink *devlink,
group_item->policer_item->policer->id))
goto nla_put_failure;
- err = devlink_trap_stats_put(msg, group_item->stats);
+ err = devlink_trap_group_stats_put(msg, group_item->stats);
if (err)
goto nla_put_failure;
@@ -7802,6 +8371,11 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_PORT_PCI_PF_NUMBER] = { .type = NLA_U16 },
[DEVLINK_ATTR_PORT_PCI_SF_NUMBER] = { .type = NLA_U32 },
[DEVLINK_ATTR_PORT_CONTROLLER_NUMBER] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_RATE_TYPE] = { .type = NLA_U16 },
+ [DEVLINK_ATTR_RATE_TX_SHARE] = { .type = NLA_U64 },
+ [DEVLINK_ATTR_RATE_TX_MAX] = { .type = NLA_U64 },
+ [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING },
};
static const struct genl_small_ops devlink_nl_ops[] = {
@@ -7828,6 +8402,30 @@ static const struct genl_small_ops devlink_nl_ops[] = {
.internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
},
{
+ .cmd = DEVLINK_CMD_RATE_GET,
+ .doit = devlink_nl_cmd_rate_get_doit,
+ .dumpit = devlink_nl_cmd_rate_get_dumpit,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_RATE,
+ /* can be retrieved by unprivileged users */
+ },
+ {
+ .cmd = DEVLINK_CMD_RATE_SET,
+ .doit = devlink_nl_cmd_rate_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_RATE,
+ },
+ {
+ .cmd = DEVLINK_CMD_RATE_NEW,
+ .doit = devlink_nl_cmd_rate_new_doit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_RATE_DEL,
+ .doit = devlink_nl_cmd_rate_del_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_RATE_NODE,
+ },
+ {
.cmd = DEVLINK_CMD_PORT_SPLIT,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_port_split_doit,
@@ -8202,6 +8800,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
xa_init_flags(&devlink->snapshot_ids, XA_FLAGS_ALLOC);
__devlink_net_set(devlink, &init_net);
INIT_LIST_HEAD(&devlink->port_list);
+ INIT_LIST_HEAD(&devlink->rate_list);
INIT_LIST_HEAD(&devlink->sb_list);
INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list);
INIT_LIST_HEAD(&devlink->resource_list);
@@ -8304,6 +8903,7 @@ void devlink_free(struct devlink *devlink)
WARN_ON(!list_empty(&devlink->resource_list));
WARN_ON(!list_empty(&devlink->dpipe_table_list));
WARN_ON(!list_empty(&devlink->sb_list));
+ WARN_ON(!list_empty(&devlink->rate_list));
WARN_ON(!list_empty(&devlink->port_list));
xa_destroy(&devlink->snapshot_ids);
@@ -8620,6 +9220,108 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 contro
}
EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_sf_set);
+/**
+ * devlink_rate_leaf_create - create devlink rate leaf
+ *
+ * @devlink_port: devlink port object to create rate object on
+ * @priv: driver private data
+ *
+ * Create devlink rate object of type leaf on provided @devlink_port.
+ * Throws call trace if @devlink_port already has a devlink rate object.
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ *
+ * Return: -ENOMEM if failed to allocate rate object, 0 otherwise.
+ */
+int
+devlink_rate_leaf_create(struct devlink_port *devlink_port, void *priv)
+{
+ struct devlink *devlink = devlink_port->devlink;
+ struct devlink_rate *devlink_rate;
+
+ devlink_rate = kzalloc(sizeof(*devlink_rate), GFP_KERNEL);
+ if (!devlink_rate)
+ return -ENOMEM;
+
+ mutex_lock(&devlink->lock);
+ WARN_ON(devlink_port->devlink_rate);
+ devlink_rate->type = DEVLINK_RATE_TYPE_LEAF;
+ devlink_rate->devlink = devlink;
+ devlink_rate->devlink_port = devlink_port;
+ devlink_rate->priv = priv;
+ list_add_tail(&devlink_rate->list, &devlink->rate_list);
+ devlink_port->devlink_rate = devlink_rate;
+ devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_NEW);
+ mutex_unlock(&devlink->lock);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_rate_leaf_create);
+
+/**
+ * devlink_rate_leaf_destroy - destroy devlink rate leaf
+ *
+ * @devlink_port: devlink port linked to the rate object
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+void devlink_rate_leaf_destroy(struct devlink_port *devlink_port)
+{
+ struct devlink_rate *devlink_rate = devlink_port->devlink_rate;
+ struct devlink *devlink = devlink_port->devlink;
+
+ if (!devlink_rate)
+ return;
+
+ mutex_lock(&devlink->lock);
+ devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_DEL);
+ list_del(&devlink_rate->list);
+ devlink_port->devlink_rate = NULL;
+ mutex_unlock(&devlink->lock);
+ kfree(devlink_rate);
+}
+EXPORT_SYMBOL_GPL(devlink_rate_leaf_destroy);
+
+/**
+ * devlink_rate_nodes_destroy - destroy all devlink rate nodes on device
+ *
+ * @devlink: devlink instance
+ *
+ * Unset parent for all rate objects and destroy all rate nodes
+ * on specified device.
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+void devlink_rate_nodes_destroy(struct devlink *devlink)
+{
+ static struct devlink_rate *devlink_rate, *tmp;
+ const struct devlink_ops *ops = devlink->ops;
+
+ mutex_lock(&devlink->lock);
+ list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
+ if (!devlink_rate->parent)
+ continue;
+
+ refcount_dec(&devlink_rate->parent->refcnt);
+ if (devlink_rate_is_leaf(devlink_rate))
+ ops->rate_leaf_parent_set(devlink_rate, NULL, devlink_rate->priv,
+ NULL, NULL);
+ else if (devlink_rate_is_node(devlink_rate))
+ ops->rate_node_parent_set(devlink_rate, NULL, devlink_rate->priv,
+ NULL, NULL);
+ }
+ list_for_each_entry_safe(devlink_rate, tmp, &devlink->rate_list, list) {
+ if (devlink_rate_is_node(devlink_rate)) {
+ ops->rate_node_del(devlink_rate, devlink_rate->priv, NULL);
+ list_del(&devlink_rate->list);
+ kfree(devlink_rate->name);
+ kfree(devlink_rate);
+ }
+ }
+ mutex_unlock(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devlink_rate_nodes_destroy);
+
static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
char *name, size_t len)
{
@@ -8632,12 +9334,17 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
switch (attrs->flavour) {
case DEVLINK_PORT_FLAVOUR_PHYSICAL:
case DEVLINK_PORT_FLAVOUR_VIRTUAL:
+ n = snprintf(name, len, "p%u", attrs->phys.port_number);
+ if (n < len && attrs->split)
+ n += snprintf(name + n, len - n, "s%u",
+ attrs->phys.split_subport_number);
if (!attrs->split)
n = snprintf(name, len, "p%u", attrs->phys.port_number);
else
n = snprintf(name, len, "p%us%u",
attrs->phys.port_number,
attrs->phys.split_subport_number);
+
break;
case DEVLINK_PORT_FLAVOUR_CPU:
case DEVLINK_PORT_FLAVOUR_DSA:
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index cd80ffed6d26..a9f937975080 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -1168,7 +1168,7 @@ static void notify_rule_change(int event, struct fib_rule *rule,
{
struct net *net;
struct sk_buff *skb;
- int err = -ENOBUFS;
+ int err = -ENOMEM;
net = ops->fro_net;
skb = nlmsg_new(fib_rule_nlmsg_size(ops, rule), GFP_KERNEL);
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 3ed7c98a98e1..2aadbfc5193b 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -943,8 +943,8 @@ bool __skb_flow_dissect(const struct net *net,
int offset = 0;
ops = skb->dev->dsa_ptr->tag_ops;
- /* Tail taggers don't break flow dissection */
- if (!ops->tail_tag) {
+ /* Only DSA header taggers break flow dissection */
+ if (ops->needed_headroom) {
if (ops->flow_dissect)
ops->flow_dissect(skb, &proto, &offset);
else
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index c310c7c1cef7..0a6b04714558 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -36,6 +36,7 @@
#include <net/ip6_checksum.h>
#include <asm/unaligned.h>
#include <trace/events/napi.h>
+#include <linux/kconfig.h>
/*
* We maintain a small pool of fully-sized skbs, to make sure the
@@ -389,7 +390,8 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
static atomic_t ip_ident;
struct ipv6hdr *ip6h;
- WARN_ON_ONCE(!irqs_disabled());
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ WARN_ON_ONCE(!irqs_disabled());
udp_len = len + sizeof(*udph);
if (np->ipv6)
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 3c4c4c7a0402..5e4eb45b139c 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -17,6 +17,7 @@
#include <linux/dma-mapping.h>
#include <linux/page-flags.h>
#include <linux/mm.h> /* for __put_page() */
+#include <linux/poison.h>
#include <trace/events/page_pool.h>
@@ -221,6 +222,8 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
return NULL;
}
+ page->pp_magic |= PP_SIGNATURE;
+
/* Track how many pages are held 'in-flight' */
pool->pages_state_hold_cnt++;
trace_page_pool_state_hold(pool, page, pool->pages_state_hold_cnt);
@@ -263,6 +266,7 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
put_page(page);
continue;
}
+ page->pp_magic |= PP_SIGNATURE;
pool->alloc.cache[pool->alloc.count++] = page;
/* Track how many pages are held 'in-flight' */
pool->pages_state_hold_cnt++;
@@ -341,6 +345,8 @@ void page_pool_release_page(struct page_pool *pool, struct page *page)
DMA_ATTR_SKIP_CPU_SYNC);
page_pool_set_dma_addr(page, 0);
skip_dma_unmap:
+ page->pp_magic = 0;
+
/* This may be the last page returned, releasing the pool, so
* it is not safe to reference pool afterwards.
*/
@@ -622,3 +628,25 @@ void page_pool_update_nid(struct page_pool *pool, int new_nid)
}
}
EXPORT_SYMBOL(page_pool_update_nid);
+
+bool page_pool_return_skb_page(struct page *page)
+{
+ struct page_pool *pp;
+
+ page = compound_head(page);
+ if (unlikely(page->pp_magic != PP_SIGNATURE))
+ return false;
+
+ pp = page->pp;
+
+ /* Driver set this to memory recycling info. Reset it on recycle.
+ * This will *not* work for NIC using a split-page memory model.
+ * The page will be returned to the pool here regardless of the
+ * 'flipped' fragment being in use or not.
+ */
+ page->pp = NULL;
+ page_pool_put_full_page(pp, page, false);
+
+ return true;
+}
+EXPORT_SYMBOL(page_pool_return_skb_page);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 3fba429f1f57..7e258d255e90 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -467,7 +467,7 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
static int pktgen_device_event(struct notifier_block *, unsigned long, void *);
static void pktgen_run_all_threads(struct pktgen_net *pn);
static void pktgen_reset_all_threads(struct pktgen_net *pn);
-static void pktgen_stop_all_threads_ifs(struct pktgen_net *pn);
+static void pktgen_stop_all_threads(struct pktgen_net *pn);
static void pktgen_stop(struct pktgen_thread *t);
static void pktgen_clear_counters(struct pktgen_dev *pkt_dev);
@@ -516,14 +516,11 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf,
data[count - 1] = 0; /* Strip trailing '\n' and terminate string */
if (!strcmp(data, "stop"))
- pktgen_stop_all_threads_ifs(pn);
-
+ pktgen_stop_all_threads(pn);
else if (!strcmp(data, "start"))
pktgen_run_all_threads(pn);
-
else if (!strcmp(data, "reset"))
pktgen_reset_all_threads(pn);
-
else
return -EINVAL;
@@ -3027,20 +3024,25 @@ static void pktgen_run(struct pktgen_thread *t)
t->control &= ~(T_STOP);
}
-static void pktgen_stop_all_threads_ifs(struct pktgen_net *pn)
+static void pktgen_handle_all_threads(struct pktgen_net *pn, u32 flags)
{
struct pktgen_thread *t;
- func_enter();
-
mutex_lock(&pktgen_thread_lock);
list_for_each_entry(t, &pn->pktgen_threads, th_list)
- t->control |= T_STOP;
+ t->control |= (flags);
mutex_unlock(&pktgen_thread_lock);
}
+static void pktgen_stop_all_threads(struct pktgen_net *pn)
+{
+ func_enter();
+
+ pktgen_handle_all_threads(pn, T_STOP);
+}
+
static int thread_is_running(const struct pktgen_thread *t)
{
const struct pktgen_dev *pkt_dev;
@@ -3103,16 +3105,9 @@ static int pktgen_wait_all_threads_run(struct pktgen_net *pn)
static void pktgen_run_all_threads(struct pktgen_net *pn)
{
- struct pktgen_thread *t;
-
func_enter();
- mutex_lock(&pktgen_thread_lock);
-
- list_for_each_entry(t, &pn->pktgen_threads, th_list)
- t->control |= (T_RUN);
-
- mutex_unlock(&pktgen_thread_lock);
+ pktgen_handle_all_threads(pn, T_RUN);
/* Propagate thread->control */
schedule_timeout_interruptible(msecs_to_jiffies(125));
@@ -3122,16 +3117,9 @@ static void pktgen_run_all_threads(struct pktgen_net *pn)
static void pktgen_reset_all_threads(struct pktgen_net *pn)
{
- struct pktgen_thread *t;
-
func_enter();
- mutex_lock(&pktgen_thread_lock);
-
- list_for_each_entry(t, &pn->pktgen_threads, th_list)
- t->control |= (T_REMDEVALL);
-
- mutex_unlock(&pktgen_thread_lock);
+ pktgen_handle_all_threads(pn, T_REMDEVALL);
/* Propagate thread->control */
schedule_timeout_interruptible(msecs_to_jiffies(125));
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 04b4f0f2a3d2..5baa86bca876 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -9,7 +9,7 @@
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
* Fixes:
- * Vitaly E. Lavrov RTA_OK arithmetics was wrong.
+ * Vitaly E. Lavrov RTA_OK arithmetic was wrong.
*/
#include <linux/bitops.h>
@@ -234,7 +234,7 @@ unlock:
* @msgtype: rtnetlink message type
* @doit: Function pointer called for each request message
* @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
- * @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions
+ * @flags: rtnl_link_flags to modify behaviour of doit/dumpit functions
*
* Like rtnl_register, but for use by removable modules.
*/
@@ -254,7 +254,7 @@ EXPORT_SYMBOL_GPL(rtnl_register_module);
* @msgtype: rtnetlink message type
* @doit: Function pointer called for each request message
* @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
- * @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions
+ * @flags: rtnl_link_flags to modify behaviour of doit/dumpit functions
*
* Registers the specified function pointers (at least one of them has
* to be non-NULL) to be called whenever a request message for the
@@ -376,12 +376,12 @@ int __rtnl_link_register(struct rtnl_link_ops *ops)
if (rtnl_link_ops_get(ops->kind))
return -EEXIST;
- /* The check for setup is here because if ops
+ /* The check for alloc/setup is here because if ops
* does not have that filled up, it is not possible
* to use the ops for creating device. So do not
* fill up dellink as well. That disables rtnl_dellink.
*/
- if (ops->setup && !ops->dellink)
+ if ((ops->alloc || ops->setup) && !ops->dellink)
ops->dellink = unregister_netdevice_queue;
list_add_tail(&ops->list, &link_ops);
@@ -1821,6 +1821,16 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
if (rtnl_fill_prop_list(skb, dev))
goto nla_put_failure;
+ if (dev->dev.parent &&
+ nla_put_string(skb, IFLA_PARENT_DEV_NAME,
+ dev_name(dev->dev.parent)))
+ goto nla_put_failure;
+
+ if (dev->dev.parent && dev->dev.parent->bus &&
+ nla_put_string(skb, IFLA_PARENT_DEV_BUS_NAME,
+ dev->dev.parent->bus->name))
+ goto nla_put_failure;
+
nlmsg_end(skb, nlh);
return 0;
@@ -1880,6 +1890,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_PERM_ADDRESS] = { .type = NLA_REJECT },
[IFLA_PROTO_DOWN_REASON] = { .type = NLA_NESTED },
[IFLA_NEW_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1),
+ [IFLA_PARENT_DEV_NAME] = { .type = NLA_NUL_STRING },
};
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -2567,7 +2578,7 @@ static int do_set_proto_down(struct net_device *dev,
if (nl_proto_down) {
proto_down = nla_get_u8(nl_proto_down);
- /* Dont turn off protodown if there are active reasons */
+ /* Don't turn off protodown if there are active reasons */
if (!proto_down && dev->proto_down_reason) {
NL_SET_ERR_MSG(extack, "Cannot clear protodown, active reasons");
return -EBUSY;
@@ -3165,8 +3176,17 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname,
return ERR_PTR(-EINVAL);
}
- dev = alloc_netdev_mqs(ops->priv_size, ifname, name_assign_type,
- ops->setup, num_tx_queues, num_rx_queues);
+ if (ops->alloc) {
+ dev = ops->alloc(tb, ifname, name_assign_type,
+ num_tx_queues, num_rx_queues);
+ if (IS_ERR(dev))
+ return dev;
+ } else {
+ dev = alloc_netdev_mqs(ops->priv_size, ifname,
+ name_assign_type, ops->setup,
+ num_tx_queues, num_rx_queues);
+ }
+
if (!dev)
return ERR_PTR(-ENOMEM);
@@ -3399,7 +3419,7 @@ replay:
return -EOPNOTSUPP;
}
- if (!ops->setup)
+ if (!ops->alloc && !ops->setup)
return -EOPNOTSUPP;
if (!ifname[0]) {
@@ -4830,8 +4850,10 @@ static int rtnl_bridge_notify(struct net_device *dev)
if (err < 0)
goto errout;
- if (!skb->len)
+ if (!skb->len) {
+ err = -EINVAL;
goto errout;
+ }
rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
return 0;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3ad22870298c..a0b1d4847efe 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -70,6 +70,7 @@
#include <net/xfrm.h>
#include <net/mpls.h>
#include <net/mptcp.h>
+#include <net/page_pool.h>
#include <linux/uaccess.h>
#include <trace/events/skb.h>
@@ -645,10 +646,13 @@ static void skb_free_head(struct sk_buff *skb)
{
unsigned char *head = skb->head;
- if (skb->head_frag)
+ if (skb->head_frag) {
+ if (skb_pp_recycle(skb, head))
+ return;
skb_free_frag(head);
- else
+ } else {
kfree(head);
+ }
}
static void skb_release_data(struct sk_buff *skb)
@@ -664,7 +668,7 @@ static void skb_release_data(struct sk_buff *skb)
skb_zcopy_clear(skb, true);
for (i = 0; i < shinfo->nr_frags; i++)
- __skb_frag_unref(&shinfo->frags[i]);
+ __skb_frag_unref(&shinfo->frags[i], skb->pp_recycle);
if (shinfo->frag_list)
kfree_skb_list(shinfo->frag_list);
@@ -1046,6 +1050,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
n->nohdr = 0;
n->peeked = 0;
C(pfmemalloc);
+ C(pp_recycle);
n->destructor = NULL;
C(tail);
C(end);
@@ -3495,7 +3500,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
fragto = &skb_shinfo(tgt)->frags[merge];
skb_frag_size_add(fragto, skb_frag_size(fragfrom));
- __skb_frag_unref(fragfrom);
+ __skb_frag_unref(fragfrom, skb->pp_recycle);
}
/* Reposition in the original skb */
@@ -5285,6 +5290,13 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
if (skb_cloned(to))
return false;
+ /* The page pool signature of struct page will eventually figure out
+ * which pages can be recycled or not but for now let's prohibit slab
+ * allocated and page_pool allocated SKBs from being coalesced.
+ */
+ if (to->pp_recycle != from->pp_recycle)
+ return false;
+
if (len <= skb_tailroom(to)) {
if (len)
BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len));
diff --git a/net/core/sock.c b/net/core/sock.c
index 958614ea16ed..ddfa88082a2b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -776,6 +776,58 @@ void sock_enable_timestamps(struct sock *sk)
}
EXPORT_SYMBOL(sock_enable_timestamps);
+void sock_set_timestamp(struct sock *sk, int optname, bool valbool)
+{
+ switch (optname) {
+ case SO_TIMESTAMP_OLD:
+ __sock_set_timestamps(sk, valbool, false, false);
+ break;
+ case SO_TIMESTAMP_NEW:
+ __sock_set_timestamps(sk, valbool, true, false);
+ break;
+ case SO_TIMESTAMPNS_OLD:
+ __sock_set_timestamps(sk, valbool, false, true);
+ break;
+ case SO_TIMESTAMPNS_NEW:
+ __sock_set_timestamps(sk, valbool, true, true);
+ break;
+ }
+}
+
+int sock_set_timestamping(struct sock *sk, int optname, int val)
+{
+ if (val & ~SOF_TIMESTAMPING_MASK)
+ return -EINVAL;
+
+ if (val & SOF_TIMESTAMPING_OPT_ID &&
+ !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
+ if (sk->sk_protocol == IPPROTO_TCP &&
+ sk->sk_type == SOCK_STREAM) {
+ if ((1 << sk->sk_state) &
+ (TCPF_CLOSE | TCPF_LISTEN))
+ return -EINVAL;
+ sk->sk_tskey = tcp_sk(sk)->snd_una;
+ } else {
+ sk->sk_tskey = 0;
+ }
+ }
+
+ if (val & SOF_TIMESTAMPING_OPT_STATS &&
+ !(val & SOF_TIMESTAMPING_OPT_TSONLY))
+ return -EINVAL;
+
+ sk->sk_tsflags = val;
+ sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);
+
+ if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
+ sock_enable_timestamp(sk,
+ SOCK_TIMESTAMPING_RX_SOFTWARE);
+ else
+ sock_disable_timestamp(sk,
+ (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
+ return 0;
+}
+
void sock_set_keepalive(struct sock *sk)
{
lock_sock(sk);
@@ -815,10 +867,18 @@ void sock_set_rcvbuf(struct sock *sk, int val)
}
EXPORT_SYMBOL(sock_set_rcvbuf);
+static void __sock_set_mark(struct sock *sk, u32 val)
+{
+ if (val != sk->sk_mark) {
+ sk->sk_mark = val;
+ sk_dst_reset(sk);
+ }
+}
+
void sock_set_mark(struct sock *sk, u32 val)
{
lock_sock(sk);
- sk->sk_mark = val;
+ __sock_set_mark(sk, val);
release_sock(sk);
}
EXPORT_SYMBOL(sock_set_mark);
@@ -989,54 +1049,15 @@ set_sndbuf:
break;
case SO_TIMESTAMP_OLD:
- __sock_set_timestamps(sk, valbool, false, false);
- break;
case SO_TIMESTAMP_NEW:
- __sock_set_timestamps(sk, valbool, true, false);
- break;
case SO_TIMESTAMPNS_OLD:
- __sock_set_timestamps(sk, valbool, false, true);
- break;
case SO_TIMESTAMPNS_NEW:
- __sock_set_timestamps(sk, valbool, true, true);
+ sock_set_timestamp(sk, valbool, optname);
break;
+
case SO_TIMESTAMPING_NEW:
case SO_TIMESTAMPING_OLD:
- if (val & ~SOF_TIMESTAMPING_MASK) {
- ret = -EINVAL;
- break;
- }
-
- if (val & SOF_TIMESTAMPING_OPT_ID &&
- !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
- if (sk->sk_protocol == IPPROTO_TCP &&
- sk->sk_type == SOCK_STREAM) {
- if ((1 << sk->sk_state) &
- (TCPF_CLOSE | TCPF_LISTEN)) {
- ret = -EINVAL;
- break;
- }
- sk->sk_tskey = tcp_sk(sk)->snd_una;
- } else {
- sk->sk_tskey = 0;
- }
- }
-
- if (val & SOF_TIMESTAMPING_OPT_STATS &&
- !(val & SOF_TIMESTAMPING_OPT_TSONLY)) {
- ret = -EINVAL;
- break;
- }
-
- sk->sk_tsflags = val;
- sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);
-
- if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
- sock_enable_timestamp(sk,
- SOCK_TIMESTAMPING_RX_SOFTWARE);
- else
- sock_disable_timestamp(sk,
- (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
+ ret = sock_set_timestamping(sk, optname, val);
break;
case SO_RCVLOWAT:
@@ -1126,10 +1147,10 @@ set_sndbuf:
case SO_MARK:
if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
ret = -EPERM;
- } else if (val != sk->sk_mark) {
- sk->sk_mark = val;
- sk_dst_reset(sk);
+ break;
}
+
+ __sock_set_mark(sk, val);
break;
case SO_RXQ_OVFL: