diff options
124 files changed, 11781 insertions, 1117 deletions
diff --git a/drivers/isdn/hardware/eicon/divasmain.c b/drivers/isdn/hardware/eicon/divasmain.c index 52377b4bf039..a2e0ed6c9a4d 100644 --- a/drivers/isdn/hardware/eicon/divasmain.c +++ b/drivers/isdn/hardware/eicon/divasmain.c @@ -481,7 +481,7 @@ void __inline__ outpp(void __iomem *addr, word p) int diva_os_register_irq(void *context, byte irq, const char *name) { int result = request_irq(irq, diva_os_irq_wrapper, - IRQF_DISABLED | IRQF_SHARED, name, context); + IRQF_SHARED, name, context); return (result); } diff --git a/drivers/isdn/sc/init.c b/drivers/isdn/sc/init.c index ca997bd4e818..92acc81f844d 100644 --- a/drivers/isdn/sc/init.c +++ b/drivers/isdn/sc/init.c @@ -336,7 +336,7 @@ static int __init sc_init(void) */ sc_adapter[cinst]->interrupt = irq[b]; if (request_irq(sc_adapter[cinst]->interrupt, interrupt_handler, - IRQF_DISABLED, interface->id, + 0, interface->id, (void *)(unsigned long) cinst)) { kfree(sc_adapter[cinst]->channel); diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index ea3e64e22e22..187b1b7772ef 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -2344,7 +2344,7 @@ int __bond_3ad_get_active_agg_info(struct bonding *bond, struct slave *slave; struct port *port; - bond_for_each_slave(bond, slave, iter) { + bond_for_each_slave_rcu(bond, slave, iter) { port = &(SLAVE_AD_INFO(slave).port); if (port->aggregator && port->aggregator->is_active) { aggregator = port->aggregator; @@ -2369,9 +2369,9 @@ int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info) { int ret; - read_lock(&bond->lock); + rcu_read_lock(); ret = __bond_3ad_get_active_agg_info(bond, ad_info); - read_unlock(&bond->lock); + rcu_read_unlock(); return ret; } @@ -2388,7 +2388,6 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev) int res = 1; int agg_id; - read_lock(&bond->lock); if (__bond_3ad_get_active_agg_info(bond, &ad_info)) { pr_debug("%s: Error: __bond_3ad_get_active_agg_info failed\n", dev->name); @@ -2406,7 +2405,7 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev) slave_agg_no = bond_xmit_hash(bond, skb, slaves_in_agg); first_ok_slave = NULL; - bond_for_each_slave(bond, slave, iter) { + bond_for_each_slave_rcu(bond, slave, iter) { agg = SLAVE_AD_INFO(slave).port.aggregator; if (!agg || agg->aggregator_identifier != agg_id) continue; @@ -2436,7 +2435,6 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev) res = bond_dev_queue_xmit(bond, skb, first_ok_slave->dev); out: - read_unlock(&bond->lock); if (res) { /* no suitable interface, frame not sent */ kfree_skb(skb); diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 576cceae026a..02872405d35d 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -230,7 +230,7 @@ static struct slave *tlb_get_least_loaded_slave(struct bonding *bond) max_gap = LLONG_MIN; /* Find the slave with the largest gap */ - bond_for_each_slave(bond, slave, iter) { + bond_for_each_slave_rcu(bond, slave, iter) { if (SLAVE_IS_OK(slave)) { long long gap = compute_gap(slave); @@ -412,6 +412,39 @@ static struct slave *rlb_next_rx_slave(struct bonding *bond) return rx_slave; } +/* Caller must hold rcu_read_lock() for read */ +static struct slave *__rlb_next_rx_slave(struct bonding *bond) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct slave *before = NULL, *rx_slave = NULL, *slave; + struct list_head *iter; + bool found = false; + + bond_for_each_slave_rcu(bond, slave, iter) { + if (!SLAVE_IS_OK(slave)) + continue; + if (!found) { + if (!before || before->speed < slave->speed) + before = slave; + } else { + if (!rx_slave || rx_slave->speed < slave->speed) + rx_slave = slave; + } + if (slave == bond_info->rx_slave) + found = true; + } + /* we didn't find anything after the current or we have something + * better before and up to the current slave + */ + if (!rx_slave || (before && rx_slave->speed < before->speed)) + rx_slave = before; + + if (rx_slave) + bond_info->rx_slave = rx_slave; + + return rx_slave; +} + /* teach the switch the mac of a disabled slave * on the primary for fault tolerance * @@ -628,12 +661,14 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); struct arp_pkt *arp = arp_pkt(skb); - struct slave *assigned_slave; + struct slave *assigned_slave, *curr_active_slave; struct rlb_client_info *client_info; u32 hash_index = 0; _lock_rx_hashtbl(bond); + curr_active_slave = rcu_dereference(bond->curr_active_slave); + hash_index = _simple_hash((u8 *)&arp->ip_dst, sizeof(arp->ip_dst)); client_info = &(bond_info->rx_hashtbl[hash_index]); @@ -658,14 +693,14 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon * that the new client can be assigned to this entry. */ if (bond->curr_active_slave && - client_info->slave != bond->curr_active_slave) { - client_info->slave = bond->curr_active_slave; + client_info->slave != curr_active_slave) { + client_info->slave = curr_active_slave; rlb_update_client(client_info); } } } /* assign a new slave */ - assigned_slave = rlb_next_rx_slave(bond); + assigned_slave = __rlb_next_rx_slave(bond); if (assigned_slave) { if (!(client_info->assigned && @@ -728,7 +763,7 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond) /* Don't modify or load balance ARPs that do not originate locally * (e.g.,arrive via a bridge). */ - if (!bond_slave_has_mac(bond, arp->mac_src)) + if (!bond_slave_has_mac_rcu(bond, arp->mac_src)) return NULL; if (arp->op_code == htons(ARPOP_REPLY)) { @@ -1343,11 +1378,6 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) skb_reset_mac_header(skb); eth_data = eth_hdr(skb); - /* make sure that the curr_active_slave do not change during tx - */ - read_lock(&bond->lock); - read_lock(&bond->curr_slave_lock); - switch (ntohs(skb->protocol)) { case ETH_P_IP: { const struct iphdr *iph = ip_hdr(skb); @@ -1429,12 +1459,12 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) if (!tx_slave) { /* unbalanced or unassigned, send through primary */ - tx_slave = bond->curr_active_slave; + tx_slave = rcu_dereference(bond->curr_active_slave); bond_info->unbalanced_load += skb->len; } if (tx_slave && SLAVE_IS_OK(tx_slave)) { - if (tx_slave != bond->curr_active_slave) { + if (tx_slave != rcu_dereference(bond->curr_active_slave)) { memcpy(eth_data->h_source, tx_slave->dev->dev_addr, ETH_ALEN); @@ -1449,8 +1479,6 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) } } - read_unlock(&bond->curr_slave_lock); - read_unlock(&bond->lock); if (res) { /* no suitable interface, frame not sent */ kfree_skb(skb); diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index e9249527e7e7..03bed0ca935e 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -179,7 +179,9 @@ static ssize_t bonding_show_slaves(struct device *d, struct slave *slave; int res = 0; - read_lock(&bond->lock); + if (!rtnl_trylock()) + return restart_syscall(); + bond_for_each_slave(bond, slave, iter) { if (res > (PAGE_SIZE - IFNAMSIZ)) { /* not enough space for another interface name */ @@ -190,7 +192,9 @@ static ssize_t bonding_show_slaves(struct device *d, } res += sprintf(buf + res, "%s ", slave->dev->name); } - read_unlock(&bond->lock); + + rtnl_unlock(); + if (res) buf[res-1] = '\n'; /* eat the leftover space */ @@ -626,6 +630,9 @@ static ssize_t bonding_store_arp_targets(struct device *d, unsigned long *targets_rx; int ind, i, j, ret = -EINVAL; + if (!rtnl_trylock()) + return restart_syscall(); + targets = bond->params.arp_targets; newtarget = in_aton(buf + 1); /* look for adds */ @@ -699,6 +706,7 @@ static ssize_t bonding_store_arp_targets(struct device *d, ret = count; out: + rtnl_unlock(); return ret; } static DEVICE_ATTR(arp_ip_target, S_IRUGO | S_IWUSR , bonding_show_arp_targets, bonding_store_arp_targets); @@ -1467,7 +1475,6 @@ static ssize_t bonding_show_queue_id(struct device *d, if (!rtnl_trylock()) return restart_syscall(); - read_lock(&bond->lock); bond_for_each_slave(bond, slave, iter) { if (res > (PAGE_SIZE - IFNAMSIZ - 6)) { /* not enough space for another interface_name:queue_id pair */ @@ -1479,9 +1486,9 @@ static ssize_t bonding_show_queue_id(struct device *d, res += sprintf(buf + res, "%s:%d ", slave->dev->name, slave->queue_id); } - read_unlock(&bond->lock); if (res) buf[res-1] = '\n'; /* eat the leftover space */ + rtnl_unlock(); return res; @@ -1530,8 +1537,6 @@ static ssize_t bonding_store_queue_id(struct device *d, if (!sdev) goto err_no_cmd; - read_lock(&bond->lock); - /* Search for thes slave and check for duplicate qids */ update_slave = NULL; bond_for_each_slave(bond, slave, iter) { @@ -1542,23 +1547,20 @@ static ssize_t bonding_store_queue_id(struct device *d, */ update_slave = slave; else if (qid && qid == slave->queue_id) { - goto err_no_cmd_unlock; + goto err_no_cmd; } } if (!update_slave) - goto err_no_cmd_unlock; + goto err_no_cmd; /* Actually set the qids for the slave */ update_slave->queue_id = qid; - read_unlock(&bond->lock); out: rtnl_unlock(); return ret; -err_no_cmd_unlock: - read_unlock(&bond->lock); err_no_cmd: pr_info("invalid input for queue_id set for %s.\n", bond->dev->name); @@ -1591,6 +1593,9 @@ static ssize_t bonding_store_slaves_active(struct device *d, struct list_head *iter; struct slave *slave; + if (!rtnl_trylock()) + return restart_syscall(); + if (sscanf(buf, "%d", &new_value) != 1) { pr_err("%s: no all_slaves_active value specified.\n", bond->dev->name); @@ -1610,7 +1615,6 @@ static ssize_t bonding_store_slaves_active(struct device *d, goto out; } - read_lock(&bond->lock); bond_for_each_slave(bond, slave, iter) { if (!bond_is_active_slave(slave)) { if (new_value) @@ -1619,8 +1623,8 @@ static ssize_t bonding_store_slaves_active(struct device *d, slave->inactive = 1; } } - read_unlock(&bond->lock); out: + rtnl_unlock(); return ret; } static DEVICE_ATTR(all_slaves_active, S_IRUGO | S_IWUSR, diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h index 0bd04fbda8e9..bb5c731e2560 100644 --- a/drivers/net/bonding/bonding.h +++ b/drivers/net/bonding/bonding.h @@ -464,6 +464,20 @@ static inline struct slave *bond_slave_has_mac(struct bonding *bond, return NULL; } +/* Caller must hold rcu_read_lock() for read */ +static inline struct slave *bond_slave_has_mac_rcu(struct bonding *bond, + const u8 *mac) +{ + struct list_head *iter; + struct slave *tmp; + + bond_for_each_slave_rcu(bond, tmp, iter) + if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr)) + return tmp; + + return NULL; +} + /* Check if the ip is present in arp ip list, or first free slot if ip == 0 * Returns -1 if not found, index if found */ diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index ea20182c6969..735765c21c95 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -2253,7 +2253,6 @@ EXPORT_SYMBOL_GPL(mlx4_set_vf_mac); int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos) { struct mlx4_priv *priv = mlx4_priv(dev); - struct mlx4_vport_oper_state *vf_oper; struct mlx4_vport_state *vf_admin; int slave; @@ -2269,7 +2268,6 @@ int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos) return -EINVAL; vf_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; - vf_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; if ((0 == vlan) && (0 == qos)) vf_admin->default_vlan = MLX4_VGT; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index fa37b7a61213..85d91665d400 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1733,7 +1733,7 @@ void mlx4_en_stop_port(struct net_device *dev, int detach) /* Unregister Mac address for the port */ mlx4_en_put_qp(priv); - if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN)) + if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN)) mdev->mac_removed[priv->port] = 1; /* Free RX Rings */ diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 0d63daa2f422..c151e7a6710a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -652,7 +652,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) QUERY_DEV_CAP_RSVD_LKEY_OFFSET); MLX4_GET(field, outbox, QUERY_DEV_CAP_FW_REASSIGN_MAC); if (field & 1<<6) - dev_cap->flags2 |= MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN; + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN; MLX4_GET(dev_cap->max_icm_sz, outbox, QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET); if (dev_cap->flags & MLX4_DEV_CAP_FLAG_COUNTERS) @@ -1713,7 +1713,6 @@ void mlx4_opreq_action(struct work_struct *work) u32 *outbox; u32 modifier; u16 token; - u16 type_m; u16 type; int err; u32 num_qps; @@ -1746,7 +1745,6 @@ void mlx4_opreq_action(struct work_struct *work) MLX4_GET(modifier, outbox, GET_OP_REQ_MODIFIER_OFFSET); MLX4_GET(token, outbox, GET_OP_REQ_TOKEN_OFFSET); MLX4_GET(type, outbox, GET_OP_REQ_TYPE_OFFSET); - type_m = type >> 12; type &= 0xfff; switch (type) { diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 60c9f4f103fc..179d26709c94 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -42,6 +42,7 @@ #include <linux/io-mapping.h> #include <linux/delay.h> #include <linux/netdevice.h> +#include <linux/kmod.h> #include <linux/mlx4/device.h> #include <linux/mlx4/doorbell.h> @@ -650,6 +651,27 @@ err_mem: return err; } +static void mlx4_request_modules(struct mlx4_dev *dev) +{ + int port; + int has_ib_port = false; + int has_eth_port = false; +#define EN_DRV_NAME "mlx4_en" +#define IB_DRV_NAME "mlx4_ib" + + for (port = 1; port <= dev->caps.num_ports; port++) { + if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB) + has_ib_port = true; + else if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) + has_eth_port = true; + } + + if (has_ib_port) + request_module_nowait(IB_DRV_NAME); + if (has_eth_port) + request_module_nowait(EN_DRV_NAME); +} + /* * Change the port configuration of the device. * Every user of this function must hold the port mutex. @@ -681,6 +703,11 @@ int mlx4_change_port_types(struct mlx4_dev *dev, } mlx4_set_port_mask(dev); err = mlx4_register_device(dev); + if (err) { + mlx4_err(dev, "Failed to register device\n"); + goto out; + } + mlx4_request_modules(dev); } out: @@ -2305,6 +2332,8 @@ slave_start: if (err) goto err_port; + mlx4_request_modules(dev); + mlx4_sense_init(dev); mlx4_start_sense(dev); diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index 55f6245efb6c..70f0213d68c4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -645,7 +645,7 @@ static const u8 __promisc_mode[] = { int mlx4_map_sw_to_hw_steering_mode(struct mlx4_dev *dev, enum mlx4_net_trans_promisc_mode flow_type) { - if (flow_type >= MLX4_FS_MODE_NUM || flow_type < 0) { + if (flow_type >= MLX4_FS_MODE_NUM) { mlx4_err(dev, "Invalid flow type. type = %d\n", flow_type); return -EINVAL; } @@ -681,7 +681,7 @@ const u16 __sw_id_hw[] = { int mlx4_map_sw_to_hw_steering_id(struct mlx4_dev *dev, enum mlx4_net_trans_rule_id id) { - if (id >= MLX4_NET_TRANS_RULE_NUM || id < 0) { + if (id >= MLX4_NET_TRANS_RULE_NUM) { mlx4_err(dev, "Invalid network rule id. id = %d\n", id); return -EINVAL; } @@ -706,7 +706,7 @@ static const int __rule_hw_sz[] = { int mlx4_hw_rule_sz(struct mlx4_dev *dev, enum mlx4_net_trans_rule_id id) { - if (id >= MLX4_NET_TRANS_RULE_NUM || id < 0) { + if (id >= MLX4_NET_TRANS_RULE_NUM) { mlx4_err(dev, "Invalid network rule id. id = %d\n", id); return -EINVAL; } diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c index 79fd269e2c54..9e08e35ce351 100644 --- a/drivers/net/ethernet/mellanox/mlx4/srq.c +++ b/drivers/net/ethernet/mellanox/mlx4/srq.c @@ -34,6 +34,7 @@ #include <linux/init.h> #include <linux/mlx4/cmd.h> +#include <linux/mlx4/srq.h> #include <linux/export.h> #include <linux/gfp.h> diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 5715318d6bab..55b8dec86233 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -87,9 +87,13 @@ struct pending_tx_info { struct xenvif_rx_meta { int id; int size; + int gso_type; int gso_size; }; +#define GSO_BIT(type) \ + (1 << XEN_NETIF_GSO_TYPE_ ## type) + /* Discriminate from any valid pending_idx value. */ #define INVALID_PENDING_IDX 0xFFFF @@ -150,10 +154,12 @@ struct xenvif { u8 fe_dev_addr[6]; /* Frontend feature information. */ + int gso_mask; + int gso_prefix_mask; + u8 can_sg:1; - u8 gso:1; - u8 gso_prefix:1; - u8 csum:1; + u8 ip_csum:1; + u8 ipv6_csum:1; /* Internal feature information. */ u8 can_queue:1; /* can queue packets for receiver? */ diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 01bb854c7f62..e4aa26748f80 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -214,10 +214,14 @@ static netdev_features_t xenvif_fix_features(struct net_device *dev, if (!vif->can_sg) features &= ~NETIF_F_SG; - if (!vif->gso && !vif->gso_prefix) + if (~(vif->gso_mask | vif->gso_prefix_mask) & GSO_BIT(TCPV4)) features &= ~NETIF_F_TSO; - if (!vif->csum) + if (~(vif->gso_mask | vif->gso_prefix_mask) & GSO_BIT(TCPV6)) + features &= ~NETIF_F_TSO6; + if (!vif->ip_csum) features &= ~NETIF_F_IP_CSUM; + if (!vif->ipv6_csum) + features &= ~NETIF_F_IPV6_CSUM; return features; } @@ -306,7 +310,7 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, vif->domid = domid; vif->handle = handle; vif->can_sg = 1; - vif->csum = 1; + vif->ip_csum = 1; vif->dev = dev; vif->credit_bytes = vif->remaining_credit = ~0UL; @@ -316,8 +320,10 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, vif->credit_timeout.expires = jiffies; dev->netdev_ops = &xenvif_netdev_ops; - dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO; - dev->features = dev->hw_features; + dev->hw_features = NETIF_F_SG | + NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | + NETIF_F_TSO | NETIF_F_TSO6; + dev->features = dev->hw_features | NETIF_F_RXCSUM; SET_ETHTOOL_OPS(dev, &xenvif_ethtool_ops); dev->tx_queue_len = XENVIF_QUEUE_LENGTH; diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index f3e591c611de..828fdab4f1a4 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -109,15 +109,12 @@ static inline unsigned long idx_to_kaddr(struct xenvif *vif, return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx)); } -/* - * This is the amount of packet we copy rather than map, so that the - * guest can't fiddle with the contents of the headers while we do - * packet processing on them (netfilter, routing, etc). +/* This is a miniumum size for the linear area to avoid lots of + * calls to __pskb_pull_tail() as we set up checksum offsets. The + * value 128 was chosen as it covers all IPv4 and most likely + * IPv6 headers. */ -#define PKT_PROT_LEN (ETH_HLEN + \ - VLAN_HLEN + \ - sizeof(struct iphdr) + MAX_IPOPTLEN + \ - sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE) +#define PKT_PROT_LEN 128 static u16 frag_get_pending_idx(skb_frag_t *frag) { @@ -145,7 +142,7 @@ static int max_required_rx_slots(struct xenvif *vif) int max = DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE); /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */ - if (vif->can_sg || vif->gso || vif->gso_prefix) + if (vif->can_sg || vif->gso_mask || vif->gso_prefix_mask) max += MAX_SKB_FRAGS + 1; /* extra_info + frags */ return max; @@ -317,6 +314,7 @@ static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif *vif, req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); meta = npo->meta + npo->meta_prod++; + meta->gso_type = XEN_NETIF_GSO_TYPE_NONE; meta->gso_size = 0; meta->size = 0; meta->id = req->id; @@ -339,6 +337,7 @@ static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb, struct gnttab_copy *copy_gop; struct xenvif_rx_meta *meta; unsigned long bytes; + int gso_type; /* Data must not cross a page boundary. */ BUG_ON(size + offset > PAGE_SIZE<<compound_order(page)); @@ -397,7 +396,14 @@ static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb, } /* Leave a gap for the GSO descriptor. */ - if (*head && skb_shinfo(skb)->gso_size && !vif->gso_prefix) + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) + gso_type = XEN_NETIF_GSO_TYPE_TCPV4; + else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) + gso_type = XEN_NETIF_GSO_TYPE_TCPV6; + else + gso_type = XEN_NETIF_GSO_TYPE_NONE; + + if (*head && ((1 << gso_type) & vif->gso_mask)) vif->rx.req_cons++; *head = 0; /* There must be something in this buffer now. */ @@ -428,14 +434,28 @@ static int xenvif_gop_skb(struct sk_buff *skb, unsigned char *data; int head = 1; int old_meta_prod; + int gso_type; + int gso_size; old_meta_prod = npo->meta_prod; + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { + gso_type = XEN_NETIF_GSO_TYPE_TCPV4; + gso_size = skb_shinfo(skb)->gso_size; + } else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) { + gso_type = XEN_NETIF_GSO_TYPE_TCPV6; + gso_size = skb_shinfo(skb)->gso_size; + } else { + gso_type = XEN_NETIF_GSO_TYPE_NONE; + gso_size = 0; + } + /* Set up a GSO prefix descriptor, if necessary */ - if (skb_shinfo(skb)->gso_size && vif->gso_prefix) { + if ((1 << skb_shinfo(skb)->gso_type) & vif->gso_prefix_mask) { req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); meta = npo->meta + npo->meta_prod++; - meta->gso_size = skb_shinfo(skb)->gso_size; + meta->gso_type = gso_type; + meta->gso_size = gso_size; meta->size = 0; meta->id = req->id; } @@ -443,10 +463,13 @@ static int xenvif_gop_skb(struct sk_buff *skb, req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); meta = npo->meta + npo->meta_prod++; - if (!vif->gso_prefix) - meta->gso_size = skb_shinfo(skb)->gso_size; - else + if ((1 << gso_type) & vif->gso_mask) { + meta->gso_type = gso_type; + meta->gso_size = gso_size; + } else { + meta->gso_type = XEN_NETIF_GSO_TYPE_NONE; meta->gso_size = 0; + } meta->size = 0; meta->id = req->id; @@ -592,7 +615,8 @@ void xenvif_rx_action(struct xenvif *vif) vif = netdev_priv(skb->dev); - if (vif->meta[npo.meta_cons].gso_size && vif->gso_prefix) { + if ((1 << vif->meta[npo.meta_cons].gso_type) & + vif->gso_prefix_mask) { resp = RING_GET_RESPONSE(&vif->rx, vif->rx.rsp_prod_pvt++); @@ -629,7 +653,8 @@ void xenvif_rx_action(struct xenvif *vif) vif->meta[npo.meta_cons].size, flags); - if (vif->meta[npo.meta_cons].gso_size && !vif->gso_prefix) { + if ((1 << vif->meta[npo.meta_cons].gso_type) & + vif->gso_mask) { struct xen_netif_extra_info *gso = (struct xen_netif_extra_info *) RING_GET_RESPONSE(&vif->rx, @@ -637,8 +662,8 @@ void xenvif_rx_action(struct xenvif *vif) resp->flags |= XEN_NETRXF_extra_info; + gso->u.gso.type = vif->meta[npo.meta_cons].gso_type; gso->u.gso.size = vif->meta[npo.meta_cons].gso_size; - gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; gso->u.gso.pad = 0; gso->u.gso.features = 0; @@ -1101,15 +1126,20 @@ static int xenvif_set_skb_gso(struct xenvif *vif, return -EINVAL; } - /* Currently only TCPv4 S.O. is supported. */ - if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) { + switch (gso->u.gso.type) { + case XEN_NETIF_GSO_TYPE_TCPV4: + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; + break; + case XEN_NETIF_GSO_TYPE_TCPV6: + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; + break; + default: netdev_err(vif->dev, "Bad GSO type %d.\n", gso->u.gso.type); xenvif_fatal_tx_err(vif); return -EINVAL; } skb_shinfo(skb)->gso_size = gso->u.gso.size; - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; /* Header must be checked, and gso_segs computed. */ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; @@ -1118,61 +1148,74 @@ static int xenvif_set_skb_gso(struct xenvif *vif, return 0; } -static int checksum_setup(struct xenvif *vif, struct sk_buff *skb) +static inline void maybe_pull_tail(struct sk_buff *skb, unsigned int len) +{ + if (skb_is_nonlinear(skb) && skb_headlen(skb) < len) { + /* If we need to pullup then pullup to the max, so we + * won't need to do it again. + */ + int target = min_t(int, skb->len, MAX_TCP_HEADER); + __pskb_pull_tail(skb, target - skb_headlen(skb)); + } +} + +static int checksum_setup_ip(struct xenvif *vif, struct sk_buff *skb, + int recalculate_partial_csum) { - struct iphdr *iph; + struct iphdr *iph = (void *)skb->data; + unsigned int header_size; + unsigned int off; int err = -EPROTO; - int recalculate_partial_csum = 0; - /* - * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy - * peers can fail to set NETRXF_csum_blank when sending a GSO - * frame. In this case force the SKB to CHECKSUM_PARTIAL and - * recalculate the partial checksum. - */ - if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) { - vif->rx_gso_checksum_fixup++; - skb->ip_summed = CHECKSUM_PARTIAL; - recalculate_partial_csum = 1; - } + off = sizeof(struct iphdr); - /* A non-CHECKSUM_PARTIAL SKB does not require setup. */ - if (skb->ip_summed != CHECKSUM_PARTIAL) - return 0; + header_size = skb->network_header + off + MAX_IPOPTLEN; + maybe_pull_tail(skb, header_size); - if (skb->protocol != htons(ETH_P_IP)) - goto out; + off = iph->ihl * 4; - iph = (void *)skb->data; switch (iph->protocol) { case IPPROTO_TCP: - if (!skb_partial_csum_set(skb, 4 * iph->ihl, + if (!skb_partial_csum_set(skb, off, offsetof(struct tcphdr, check))) goto out; if (recalculate_partial_csum) { struct tcphdr *tcph = tcp_hdr(skb); + + header_size = skb->network_header + + off + + sizeof(struct tcphdr); + maybe_pull_tail(skb, header_size); + tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, - skb->len - iph->ihl*4, + skb->len - off, IPPROTO_TCP, 0); } break; case IPPROTO_UDP: - if (!skb_partial_csum_set(skb, 4 * iph->ihl, + if (!skb_partial_csum_set(skb, off, offsetof(struct udphdr, check))) goto out; if (recalculate_partial_csum) { struct udphdr *udph = udp_hdr(skb); + + header_size = skb->network_header + + off + + sizeof(struct udphdr); + maybe_pull_tail(skb, header_size); + udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, - skb->len - iph->ihl*4, + skb->len - off, IPPROTO_UDP, 0); } break; default: if (net_ratelimit()) netdev_err(vif->dev, - "Attempting to checksum a non-TCP/UDP packet, dropping a protocol %d packet\n", + "Attempting to checksum a non-TCP/UDP packet, " + "dropping a protocol %d packet\n", iph->protocol); goto out; } @@ -1183,6 +1226,158 @@ out: return err; } +static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb, + int recalculate_partial_csum) +{ + int err = -EPROTO; + struct ipv6hdr *ipv6h = (void *)skb->data; + u8 nexthdr; + unsigned int header_size; + unsigned int off; + bool fragment; + bool done; + + done = false; + + off = sizeof(struct ipv6hdr); + + header_size = skb->network_header + off; + maybe_pull_tail(skb, header_size); + + nexthdr = ipv6h->nexthdr; + + while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h->payload_len)) && + !done) { + switch (nexthdr) { + case IPPROTO_DSTOPTS: + case IPPROTO_HOPOPTS: + case IPPROTO_ROUTING: { + struct ipv6_opt_hdr *hp = (void *)(skb->data + off); + + header_size = skb->network_header + + off + + sizeof(struct ipv6_opt_hdr); + maybe_pull_tail(skb, header_size); + + nexthdr = hp->nexthdr; + off += ipv6_optlen(hp); + break; + } + case IPPROTO_AH: { + struct ip_auth_hdr *hp = (void *)(skb->data + off); + + header_size = skb->network_header + + off + + sizeof(struct ip_auth_hdr); + maybe_pull_tail(skb, header_size); + + nexthdr = hp->nexthdr; + off += (hp->hdrlen+2)<<2; + break; + } + case IPPROTO_FRAGMENT: + fragment = true; + /* fall through */ + default: + done = true; + break; + } + } + + if (!done) { + if (net_ratelimit()) + netdev_err(vif->dev, "Failed to parse packet header\n"); + goto out; + } + + if (fragment) { + if (net_ratelimit()) + netdev_err(vif->dev, "Packet is a fragment!\n"); + goto out; + } + + switch (nexthdr) { + case IPPROTO_TCP: + if (!skb_partial_csum_set(skb, off, + offsetof(struct tcphdr, check))) + goto out; + + if (recalculate_partial_csum) { + struct tcphdr *tcph = tcp_hdr(skb); + + header_size = skb->network_header + + off + + sizeof(struct tcphdr); + maybe_pull_tail(skb, header_size); + + tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, + &ipv6h->daddr, + skb->len - off, + IPPROTO_TCP, 0); + } + break; + case IPPROTO_UDP: + if (!skb_partial_csum_set(skb, off, + offsetof(struct udphdr, check))) + goto out; + + if (recalculate_partial_csum) { + struct udphdr *udph = udp_hdr(skb); + + header_size = skb->network_header + + off + + sizeof(struct udphdr); + maybe_pull_tail(skb, header_size); + + udph->check = ~csum_ipv6_magic(&ipv6h->saddr, + &ipv6h->daddr, + skb->len - off, + IPPROTO_UDP, 0); + } + break; + default: + if (net_ratelimit()) + netdev_err(vif->dev, + "Attempting to checksum a non-TCP/UDP packet, " + "dropping a protocol %d packet\n", + nexthdr); + goto out; + } + + err = 0; + +out: + return err; +} + +static int checksum_setup(struct xenvif *vif, struct sk_buff *skb) +{ + int err = -EPROTO; + int recalculate_partial_csum = 0; + + /* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy + * peers can fail to set NETRXF_csum_blank when sending a GSO + * frame. In this case force the SKB to CHECKSUM_PARTIAL and + * recalculate the partial checksum. + */ + if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) { + vif->rx_gso_checksum_fixup++; + skb->ip_summed = CHECKSUM_PARTIAL; + recalculate_partial_csum = 1; + } + + /* A non-CHECKSUM_PARTIAL SKB does not require setup. */ + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; + + if (skb->protocol == htons(ETH_P_IP)) + err = checksum_setup_ip(vif, skb, recalculate_partial_csum); + else if (skb->protocol == htons(ETH_P_IPV6)) + err = checksum_setup_ipv6(vif, skb, recalculate_partial_csum); + + return err; +} + static bool tx_credit_exceeded(struct xenvif *vif, unsigned size) { unsigned long now = jiffies; @@ -1428,12 +1623,7 @@ static int xenvif_tx_submit(struct xenvif *vif, int budget) xenvif_fill_frags(vif, skb); - /* - * If the initial fragment was < PKT_PROT_LEN then - * pull through some bytes from the other fragments to - * increase the linear region to PKT_PROT_LEN bytes. - */ - if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) { + if (skb_is_nonlinear(skb) && skb_headlen(skb) < PKT_PROT_LEN) { int target = min_t(int, skb->len, PKT_PROT_LEN); __pskb_pull_tail(skb, target - skb_headlen(skb)); } diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 1b08d8798372..f0358992b04f 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -105,6 +105,22 @@ static int netback_probe(struct xenbus_device *dev, goto abort_transaction; } + err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv6", + "%d", sg); + if (err) { + message = "writing feature-gso-tcpv6"; + goto abort_transaction; + } + + /* We support partial checksum setup for IPv6 packets */ + err = xenbus_printf(xbt, dev->nodename, + "feature-ipv6-csum-offload", + "%d", 1); + if (err) { + message = "writing feature-ipv6-csum-offload"; + goto abort_transaction; + } + /* We support rx-copy path. */ err = xenbus_printf(xbt, dev->nodename, "feature-rx-copy", "%d", 1); @@ -561,20 +577,50 @@ static int connect_rings(struct backend_info *be) val = 0; vif->can_sg = !!val; + vif->gso_mask = 0; + vif->gso_prefix_mask = 0; + if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4", "%d", &val) < 0) val = 0; - vif->gso = !!val; + if (val) + vif->gso_mask |= GSO_BIT(TCPV4); if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix", "%d", &val) < 0) val = 0; - vif->gso_prefix = !!val; + if (val) + vif->gso_prefix_mask |= GSO_BIT(TCPV4); + + if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv6", + "%d", &val) < 0) + val = 0; + if (val) + vif->gso_mask |= GSO_BIT(TCPV6); + + if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv6-prefix", + "%d", &val) < 0) + val = 0; + if (val) + vif->gso_prefix_mask |= GSO_BIT(TCPV6); + + if (vif->gso_mask & vif->gso_prefix_mask) { + xenbus_dev_fatal(dev, err, + "%s: gso and gso prefix flags are not " + "mutually exclusive", + dev->otherend); + return -EOPNOTSUPP; + } if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload", "%d", &val) < 0) val = 0; - vif->csum = !val; + vif->ip_csum = !val; + + if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-ipv6-csum-offload", + "%d", &val) < 0) + val = 0; + vif->ipv6_csum = !!val; /* Map the shared frame, irq etc. */ err = xenvif_connect(vif, tx_ring_ref, rx_ring_ref, diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index cd1fdf75103b..8df61bc5da00 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -154,10 +154,6 @@ enum { MLX4_CMD_QUERY_IF_STAT = 0X54, MLX4_CMD_SET_IF_STAT = 0X55, - /* set port opcode modifiers */ - MLX4_SET_PORT_PRIO2TC = 0x8, - MLX4_SET_PORT_SCHEDULER = 0x9, - /* register/delete flow steering network rules */ MLX4_QP_FLOW_STEERING_ATTACH = 0x65, MLX4_QP_FLOW_STEERING_DETACH = 0x66, @@ -182,6 +178,8 @@ enum { MLX4_SET_PORT_VLAN_TABLE = 0x3, MLX4_SET_PORT_PRIO_MAP = 0x4, MLX4_SET_PORT_GID_TABLE = 0x5, + MLX4_SET_PORT_PRIO2TC = 0x8, + MLX4_SET_PORT_SCHEDULER = 0x9, }; enum { diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 24ce6bdd540e..9ad0c18495ad 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -155,7 +155,7 @@ enum { MLX4_DEV_CAP_FLAG2_RSS_TOP = 1LL << 1, MLX4_DEV_CAP_FLAG2_RSS_XOR = 1LL << 2, MLX4_DEV_CAP_FLAG2_FS_EN = 1LL << 3, - MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN = 1LL << 4, + MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN = 1LL << 4, MLX4_DEV_CAP_FLAG2_TS = 1LL << 5, MLX4_DEV_CAP_FLAG2_VLAN_CONTROL = 1LL << 6, MLX4_DEV_CAP_FLAG2_FSM = 1LL << 7, diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 61223c52414f..2077489f9887 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -42,7 +42,8 @@ int netfilter_init(void); struct sk_buff; -typedef unsigned int nf_hookfn(unsigned int hooknum, +struct nf_hook_ops; +typedef unsigned int nf_hookfn(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -52,12 +53,13 @@ struct nf_hook_ops { struct list_head list; /* User fills in from here down. */ - nf_hookfn *hook; - struct module *owner; - u_int8_t pf; - unsigned int hooknum; + nf_hookfn *hook; + struct module *owner; + void *priv; + u_int8_t pf; + unsigned int hooknum; /* Hooks are ordered in ascending priority. */ - int priority; + int priority; }; struct nf_sockopt_ops { diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 4f68cd7141d2..28c74367e900 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -14,6 +14,9 @@ struct nfnl_callback { int (*call_rcu)(struct sock *nl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[]); + int (*call_batch)(struct sock *nl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]); const struct nla_policy *policy; /* netlink attribute policy */ const u_int16_t attr_count; /* number of nlattr's */ }; @@ -23,6 +26,8 @@ struct nfnetlink_subsystem { __u8 subsys_id; /* nfnetlink subsystem ID */ __u8 cb_count; /* number of callbacks */ const struct nfnl_callback *cb; /* callback for individual types */ + int (*commit)(struct sk_buff *skb); + int (*abort)(struct sk_buff *skb); }; int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n); diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index b647c6270eb7..71c6e264e5b5 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -135,7 +135,7 @@ struct inet_timewait_sock { tw_transparent : 1, tw_pad : 6, /* 6 bits hole */ tw_tos : 8, - tw_pad2 : 16 /* 16 bits hole */ + tw_pad2 : 16; /* 16 bits hole */ kmemcheck_bitfield_end(flags); u32 tw_ttd; struct inet_bind_bucket *tw_tb; diff --git a/include/net/irda/irda_device.h b/include/net/irda/irda_device.h index 94c852d47d0f..11417475a6c3 100644 --- a/include/net/irda/irda_device.h +++ b/include/net/irda/irda_device.h @@ -162,7 +162,7 @@ typedef struct { int irq, irq2; /* Interrupts used */ int dma, dma2; /* DMA channel(s) used */ int fifo_size; /* FIFO size */ - int irqflags; /* interrupt flags (ie, IRQF_SHARED|IRQF_DISABLED) */ + int irqflags; /* interrupt flags (ie, IRQF_SHARED) */ int direction; /* Link direction, used by some FIR drivers */ int enabled; /* Powered on? */ int suspended; /* Suspended by APM */ diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index bcc4a8ed4450..da68c9a90ac5 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -22,6 +22,7 @@ #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #include <net/netns/conntrack.h> #endif +#include <net/netns/nftables.h> #include <net/netns/xfrm.h> struct user_namespace; @@ -101,6 +102,9 @@ struct net { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) struct netns_ct ct; #endif +#if defined(CONFIG_NF_TABLES) || defined(CONFIG_NF_TABLES_MODULE) + struct netns_nftables nft; +#endif #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) struct netns_nf_frag nf_frag; #endif diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index c29b4e545f87..07eaaf604092 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -45,6 +45,9 @@ unsigned int nf_nat_setup_info(struct nf_conn *ct, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype); +extern unsigned int nf_nat_alloc_null_binding(struct nf_conn *ct, + unsigned int hooknum); + /* Is this tuple already taken? (not by us)*/ int nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack); diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h new file mode 100644 index 000000000000..54c4a5cafb64 --- /dev/null +++ b/include/net/netfilter/nf_tables.h @@ -0,0 +1,522 @@ +#ifndef _NET_NF_TABLES_H +#define _NET_NF_TABLES_H + +#include <linux/list.h> +#include <linux/netfilter.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netlink.h> + +#define NFT_JUMP_STACK_SIZE 16 + +struct nft_pktinfo { + struct sk_buff *skb; + const struct net_device *in; + const struct net_device *out; + u8 hooknum; + u8 nhoff; + u8 thoff; + /* for x_tables compatibility */ + struct xt_action_param xt; +}; + +static inline void nft_set_pktinfo(struct nft_pktinfo *pkt, + const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out) +{ + pkt->skb = skb; + pkt->in = pkt->xt.in = in; + pkt->out = pkt->xt.out = out; + pkt->hooknum = pkt->xt.hooknum = ops->hooknum; + pkt->xt.family = ops->pf; +} + +struct nft_data { + union { + u32 data[4]; + struct { + u32 verdict; + struct nft_chain *chain; + }; + }; +} __attribute__((aligned(__alignof__(u64)))); + +static inline int nft_data_cmp(const struct nft_data *d1, + const struct nft_data *d2, + unsigned int len) +{ + return memcmp(d1->data, d2->data, len); +} + +static inline void nft_data_copy(struct nft_data *dst, + const struct nft_data *src) +{ + BUILD_BUG_ON(__alignof__(*dst) != __alignof__(u64)); + *(u64 *)&dst->data[0] = *(u64 *)&src->data[0]; + *(u64 *)&dst->data[2] = *(u64 *)&src->data[2]; +} + +static inline void nft_data_debug(const struct nft_data *data) +{ + pr_debug("data[0]=%x data[1]=%x data[2]=%x data[3]=%x\n", + data->data[0], data->data[1], + data->data[2], data->data[3]); +} + +/** + * struct nft_ctx - nf_tables rule/set context + * + * @net: net namespace + * @skb: netlink skb + * @nlh: netlink message header + * @afi: address family info + * @table: the table the chain is contained in + * @chain: the chain the rule is contained in + * @nla: netlink attributes + */ +struct nft_ctx { + struct net *net; + const struct sk_buff *skb; + const struct nlmsghdr *nlh; + const struct nft_af_info *afi; + const struct nft_table *table; + const struct nft_chain *chain; + const struct nlattr * const *nla; +}; + +struct nft_data_desc { + enum nft_data_types type; + unsigned int len; +}; + +extern int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data, + struct nft_data_desc *desc, const struct nlattr *nla); +extern void nft_data_uninit(const struct nft_data *data, + enum nft_data_types type); +extern int nft_data_dump(struct sk_buff *skb, int attr, + const struct nft_data *data, + enum nft_data_types type, unsigned int len); + +static inline enum nft_data_types nft_dreg_to_type(enum nft_registers reg) +{ + return reg == NFT_REG_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE; +} + +static inline enum nft_registers nft_type_to_reg(enum nft_data_types type) +{ + return type == NFT_DATA_VERDICT ? NFT_REG_VERDICT : NFT_REG_1; +} + +extern int nft_validate_input_register(enum nft_registers reg); +extern int nft_validate_output_register(enum nft_registers reg); +extern int nft_validate_data_load(const struct nft_ctx *ctx, + enum nft_registers reg, + const struct nft_data *data, + enum nft_data_types type); + +/** + * struct nft_set_elem - generic representation of set elements + * + * @cookie: implementation specific element cookie + * @key: element key + * @data: element data (maps only) + * @flags: element flags (end of interval) + * + * The cookie can be used to store a handle to the element for subsequent + * removal. + */ +struct nft_set_elem { + void *cookie; + struct nft_data key; + struct nft_data data; + u32 flags; +}; + +struct nft_set; +struct nft_set_iter { + unsigned int count; + unsigned int skip; + int err; + int (*fn)(const struct nft_ctx *ctx, + const struct nft_set *set, + const struct nft_set_iter *iter, + const struct nft_set_elem *elem); +}; + +/** + * struct nft_set_ops - nf_tables set operations + * + * @lookup: look up an element within the set + * @insert: insert new element into set + * @remove: remove element from set + * @walk: iterate over all set elemeennts + * @privsize: function to return size of set private data + * @init: initialize private data of new set instance + * @destroy: destroy private data of set instance + * @list: nf_tables_set_ops list node + * @owner: module reference + * @features: features supported by the implementation + */ +struct nft_set_ops { + bool (*lookup)(const struct nft_set *set, + const struct nft_data *key, + struct nft_data *data); + int (*get)(const struct nft_set *set, + struct nft_set_elem *elem); + int (*insert)(const struct nft_set *set, + const struct nft_set_elem *elem); + void (*remove)(const struct nft_set *set, + const struct nft_set_elem *elem); + void (*walk)(const struct nft_ctx *ctx, + const struct nft_set *set, + struct nft_set_iter *iter); + + unsigned int (*privsize)(const struct nlattr * const nla[]); + int (*init)(const struct nft_set *set, + const struct nlattr * const nla[]); + void (*destroy)(const struct nft_set *set); + + struct list_head list; + struct module *owner; + u32 features; +}; + +extern int nft_register_set(struct nft_set_ops *ops); +extern void nft_unregister_set(struct nft_set_ops *ops); + +/** + * struct nft_set - nf_tables set instance + * + * @list: table set list node + * @bindings: list of set bindings + * @name: name of the set + * @ktype: key type (numeric type defined by userspace, not used in the kernel) + * @dtype: data type (verdict or numeric type defined by userspace) + * @ops: set ops + * @flags: set flags + * @klen: key length + * @dlen: data length + * @data: private set data + */ +struct nft_set { + struct list_head list; + struct list_head bindings; + char name[IFNAMSIZ]; + u32 ktype; + u32 dtype; + /* runtime data below here */ + const struct nft_set_ops *ops ____cacheline_aligned; + u16 flags; + u8 klen; + u8 dlen; + unsigned char data[] + __attribute__((aligned(__alignof__(u64)))); +}; + +static inline void *nft_set_priv(const struct nft_set *set) +{ + return (void *)set->data; +} + +extern struct nft_set *nf_tables_set_lookup(const struct nft_table *table, + const struct nlattr *nla); + +/** + * struct nft_set_binding - nf_tables set binding + * + * @list: set bindings list node + * @chain: chain containing the rule bound to the set + * + * A set binding contains all information necessary for validation + * of new elements added to a bound set. + */ +struct nft_set_binding { + struct list_head list; + const struct nft_chain *chain; +}; + +extern int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_binding *binding); +extern void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_binding *binding); + + +/** + * struct nft_expr_type - nf_tables expression type + * + * @select_ops: function to select nft_expr_ops + * @ops: default ops, used when no select_ops functions is present + * @list: used internally + * @name: Identifier + * @owner: module reference + * @policy: netlink attribute policy + * @maxattr: highest netlink attribute number + */ +struct nft_expr_type { + const struct nft_expr_ops *(*select_ops)(const struct nft_ctx *, + const struct nlattr * const tb[]); + const struct nft_expr_ops *ops; + struct list_head list; + const char *name; + struct module *owner; + const struct nla_policy *policy; + unsigned int maxattr; +}; + +/** + * struct nft_expr_ops - nf_tables expression operations + * + * @eval: Expression evaluation function + * @size: full expression size, including private data size + * @init: initialization function + * @destroy: destruction function + * @dump: function to dump parameters + * @type: expression type + * @validate: validate expression, called during loop detection + * @data: extra data to attach to this expression operation + */ +struct nft_expr; +struct nft_expr_ops { + void (*eval)(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt); + unsigned int size; + + int (*init)(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]); + void (*destroy)(const struct nft_expr *expr); + int (*dump)(struct sk_buff *skb, + const struct nft_expr *expr); + int (*validate)(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data); + const struct nft_expr_type *type; + void *data; +}; + +#define NFT_EXPR_MAXATTR 16 +#define NFT_EXPR_SIZE(size) (sizeof(struct nft_expr) + \ + ALIGN(size, __alignof__(struct nft_expr))) + +/** + * struct nft_expr - nf_tables expression + * + * @ops: expression ops + * @data: expression private data + */ +struct nft_expr { + const struct nft_expr_ops *ops; + unsigned char data[]; +}; + +static inline void *nft_expr_priv(const struct nft_expr *expr) +{ + return (void *)expr->data; +} + +/** + * struct nft_rule - nf_tables rule + * + * @list: used internally + * @rcu_head: used internally for rcu + * @handle: rule handle + * @genmask: generation mask + * @dlen: length of expression data + * @data: expression data + */ +struct nft_rule { + struct list_head list; + struct rcu_head rcu_head; + u64 handle:46, + genmask:2, + dlen:16; + unsigned char data[] + __attribute__((aligned(__alignof__(struct nft_expr)))); +}; + +/** + * struct nft_rule_trans - nf_tables rule update in transaction + * + * @list: used internally + * @rule: rule that needs to be updated + * @chain: chain that this rule belongs to + * @table: table for which this chain applies + * @nlh: netlink header of the message that contain this update + * @family: family expressesed as AF_* + */ +struct nft_rule_trans { + struct list_head list; + struct nft_rule *rule; + const struct nft_chain *chain; + const struct nft_table *table; + const struct nlmsghdr *nlh; + u8 family; +}; + +static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule) +{ + return (struct nft_expr *)&rule->data[0]; +} + +static inline struct nft_expr *nft_expr_next(const struct nft_expr *expr) +{ + return ((void *)expr) + expr->ops->size; +} + +static inline struct nft_expr *nft_expr_last(const struct nft_rule *rule) +{ + return (struct nft_expr *)&rule->data[rule->dlen]; +} + +/* + * The last pointer isn't really necessary, but the compiler isn't able to + * determine that the result of nft_expr_last() is always the same since it + * can't assume that the dlen value wasn't changed within calls in the loop. + */ +#define nft_rule_for_each_expr(expr, last, rule) \ + for ((expr) = nft_expr_first(rule), (last) = nft_expr_last(rule); \ + (expr) != (last); \ + (expr) = nft_expr_next(expr)) + +enum nft_chain_flags { + NFT_BASE_CHAIN = 0x1, +}; + +/** + * struct nft_chain - nf_tables chain + * + * @rules: list of rules in the chain + * @list: used internally + * @rcu_head: used internally + * @net: net namespace that this chain belongs to + * @table: table that this chain belongs to + * @handle: chain handle + * @flags: bitmask of enum nft_chain_flags + * @use: number of jump references to this chain + * @level: length of longest path to this chain + * @name: name of the chain + */ +struct nft_chain { + struct list_head rules; + struct list_head list; + struct rcu_head rcu_head; + struct net *net; + struct nft_table *table; + u64 handle; + u8 flags; + u16 use; + u16 level; + char name[NFT_CHAIN_MAXNAMELEN]; +}; + +enum nft_chain_type { + NFT_CHAIN_T_DEFAULT = 0, + NFT_CHAIN_T_ROUTE, + NFT_CHAIN_T_NAT, + NFT_CHAIN_T_MAX +}; + +struct nft_stats { + u64 bytes; + u64 pkts; +}; + +/** + * struct nft_base_chain - nf_tables base chain + * + * @ops: netfilter hook ops + * @type: chain type + * @policy: default policy + * @stats: per-cpu chain stats + * @chain: the chain + */ +struct nft_base_chain { + struct nf_hook_ops ops; + enum nft_chain_type type; + u8 policy; + struct nft_stats __percpu *stats; + struct nft_chain chain; +}; + +static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chain) +{ + return container_of(chain, struct nft_base_chain, chain); +} + +extern unsigned int nft_do_chain_pktinfo(struct nft_pktinfo *pkt, + const struct nf_hook_ops *ops); + +/** + * struct nft_table - nf_tables table + * + * @list: used internally + * @chains: chains in the table + * @sets: sets in the table + * @hgenerator: handle generator state + * @use: number of chain references to this table + * @flags: table flag (see enum nft_table_flags) + * @name: name of the table + */ +struct nft_table { + struct list_head list; + struct list_head chains; + struct list_head sets; + u64 hgenerator; + u32 use; + u16 flags; + char name[]; +}; + +/** + * struct nft_af_info - nf_tables address family info + * + * @list: used internally + * @family: address family + * @nhooks: number of hooks in this family + * @owner: module owner + * @tables: used internally + * @hooks: hookfn overrides for packet validation + */ +struct nft_af_info { + struct list_head list; + int family; + unsigned int nhooks; + struct module *owner; + struct list_head tables; + nf_hookfn *hooks[NF_MAX_HOOKS]; +}; + +extern int nft_register_afinfo(struct net *, struct nft_af_info *); +extern void nft_unregister_afinfo(struct nft_af_info *); + +struct nf_chain_type { + unsigned int hook_mask; + const char *name; + enum nft_chain_type type; + nf_hookfn *fn[NF_MAX_HOOKS]; + struct module *me; + int family; +}; + +extern int nft_register_chain_type(struct nf_chain_type *); +extern void nft_unregister_chain_type(struct nf_chain_type *); + +extern int nft_register_expr(struct nft_expr_type *); +extern void nft_unregister_expr(struct nft_expr_type *); + +#define MODULE_ALIAS_NFT_FAMILY(family) \ + MODULE_ALIAS("nft-afinfo-" __stringify(family)) + +#define MODULE_ALIAS_NFT_CHAIN(family, name) \ + MODULE_ALIAS("nft-chain-" __stringify(family) "-" name) + +#define MODULE_ALIAS_NFT_EXPR(name) \ + MODULE_ALIAS("nft-expr-" name) + +#define MODULE_ALIAS_NFT_SET() \ + MODULE_ALIAS("nft-set") + +#endif /* _NET_NF_TABLES_H */ diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h new file mode 100644 index 000000000000..fe7b16206a4e --- /dev/null +++ b/include/net/netfilter/nf_tables_core.h @@ -0,0 +1,42 @@ +#ifndef _NET_NF_TABLES_CORE_H +#define _NET_NF_TABLES_CORE_H + +extern int nf_tables_core_module_init(void); +extern void nf_tables_core_module_exit(void); + +extern int nft_immediate_module_init(void); +extern void nft_immediate_module_exit(void); + +struct nft_cmp_fast_expr { + u32 data; + enum nft_registers sreg:8; + u8 len; +}; + +extern const struct nft_expr_ops nft_cmp_fast_ops; + +extern int nft_cmp_module_init(void); +extern void nft_cmp_module_exit(void); + +extern int nft_lookup_module_init(void); +extern void nft_lookup_module_exit(void); + +extern int nft_bitwise_module_init(void); +extern void nft_bitwise_module_exit(void); + +extern int nft_byteorder_module_init(void); +extern void nft_byteorder_module_exit(void); + +struct nft_payload { + enum nft_payload_bases base:8; + u8 offset; + u8 len; + enum nft_registers dreg:8; +}; + +extern const struct nft_expr_ops nft_payload_fast_ops; + +extern int nft_payload_module_init(void); +extern void nft_payload_module_exit(void); + +#endif /* _NET_NF_TABLES_CORE_H */ diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h new file mode 100644 index 000000000000..1be1c2c197ee --- /dev/null +++ b/include/net/netfilter/nf_tables_ipv4.h @@ -0,0 +1,23 @@ +#ifndef _NF_TABLES_IPV4_H_ +#define _NF_TABLES_IPV4_H_ + +#include <net/netfilter/nf_tables.h> +#include <net/ip.h> + +static inline void +nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt, + const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out) +{ + struct iphdr *ip; + + nft_set_pktinfo(pkt, ops, skb, in, out); + + pkt->xt.thoff = ip_hdrlen(pkt->skb); + ip = ip_hdr(pkt->skb); + pkt->xt.fragoff = ntohs(ip->frag_off) & IP_OFFSET; +} + +#endif diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h new file mode 100644 index 000000000000..4a9b88a65963 --- /dev/null +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -0,0 +1,30 @@ +#ifndef _NF_TABLES_IPV6_H_ +#define _NF_TABLES_IPV6_H_ + +#include <linux/netfilter_ipv6/ip6_tables.h> +#include <net/ipv6.h> + +static inline int +nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt, + const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out) +{ + int protohdr, thoff = 0; + unsigned short frag_off; + + nft_set_pktinfo(pkt, ops, skb, in, out); + + protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL); + /* If malformed, drop it */ + if (protohdr < 0) + return -1; + + pkt->xt.thoff = thoff; + pkt->xt.fragoff = frag_off; + + return 0; +} + +#endif diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h new file mode 100644 index 000000000000..15d056d534e3 --- /dev/null +++ b/include/net/netns/nftables.h @@ -0,0 +1,19 @@ +#ifndef _NETNS_NFTABLES_H_ +#define _NETNS_NFTABLES_H_ + +#include <linux/list.h> + +struct nft_af_info; + +struct netns_nftables { + struct list_head af_info; + struct list_head commit_list; + struct nft_af_info *ipv4; + struct nft_af_info *ipv6; + struct nft_af_info *arp; + struct nft_af_info *bridge; + u8 gencursor; + u8 genctr; +}; + +#endif diff --git a/include/net/route.h b/include/net/route.h index 0ad8e0102386..dd4ae0029fd8 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -88,22 +88,14 @@ struct ip_rt_acct { }; struct rt_cache_stat { - unsigned int in_hit; unsigned int in_slow_tot; unsigned int in_slow_mc; unsigned int in_no_route; unsigned int in_brd; unsigned int in_martian_dst; unsigned int in_martian_src; - unsigned int out_hit; unsigned int out_slow_tot; unsigned int out_slow_mc; - unsigned int gc_total; - unsigned int gc_ignored; - unsigned int gc_goal_miss; - unsigned int gc_dst_overflow; - unsigned int in_hlist_search; - unsigned int out_hlist_search; }; extern struct ip_rt_acct __percpu *ip_rt_acct; diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild index 174915420d3f..17c3af2c4bb9 100644 --- a/include/uapi/linux/netfilter/Kbuild +++ b/include/uapi/linux/netfilter/Kbuild @@ -5,6 +5,8 @@ header-y += nf_conntrack_ftp.h header-y += nf_conntrack_sctp.h header-y += nf_conntrack_tcp.h header-y += nf_conntrack_tuple_common.h +header-y += nf_tables.h +header-y += nf_tables_compat.h header-y += nf_nat.h header-y += nfnetlink.h header-y += nfnetlink_acct.h diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 8dd803818ebe..319f47128db8 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -25,6 +25,10 @@ enum ip_conntrack_info { IP_CT_NUMBER = IP_CT_IS_REPLY * 2 - 1 }; +#define NF_CT_STATE_INVALID_BIT (1 << 0) +#define NF_CT_STATE_BIT(ctinfo) (1 << ((ctinfo) % IP_CT_IS_REPLY + 1)) +#define NF_CT_STATE_UNTRACKED_BIT (1 << (IP_CT_NUMBER + 1)) + /* Bitset representing status of connection. */ enum ip_conntrack_status { /* It's an expected connection: bit 0 set. This bit never changed */ diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h new file mode 100644 index 000000000000..fbfd229a8e99 --- /dev/null +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -0,0 +1,718 @@ +#ifndef _LINUX_NF_TABLES_H +#define _LINUX_NF_TABLES_H + +#define NFT_CHAIN_MAXNAMELEN 32 + +enum nft_registers { + NFT_REG_VERDICT, + NFT_REG_1, + NFT_REG_2, + NFT_REG_3, + NFT_REG_4, + __NFT_REG_MAX +}; +#define NFT_REG_MAX (__NFT_REG_MAX - 1) + +/** + * enum nft_verdicts - nf_tables internal verdicts + * + * @NFT_CONTINUE: continue evaluation of the current rule + * @NFT_BREAK: terminate evaluation of the current rule + * @NFT_JUMP: push the current chain on the jump stack and jump to a chain + * @NFT_GOTO: jump to a chain without pushing the current chain on the jump stack + * @NFT_RETURN: return to the topmost chain on the jump stack + * + * The nf_tables verdicts share their numeric space with the netfilter verdicts. + */ +enum nft_verdicts { + NFT_CONTINUE = -1, + NFT_BREAK = -2, + NFT_JUMP = -3, + NFT_GOTO = -4, + NFT_RETURN = -5, +}; + +/** + * enum nf_tables_msg_types - nf_tables netlink message types + * + * @NFT_MSG_NEWTABLE: create a new table (enum nft_table_attributes) + * @NFT_MSG_GETTABLE: get a table (enum nft_table_attributes) + * @NFT_MSG_DELTABLE: delete a table (enum nft_table_attributes) + * @NFT_MSG_NEWCHAIN: create a new chain (enum nft_chain_attributes) + * @NFT_MSG_GETCHAIN: get a chain (enum nft_chain_attributes) + * @NFT_MSG_DELCHAIN: delete a chain (enum nft_chain_attributes) + * @NFT_MSG_NEWRULE: create a new rule (enum nft_rule_attributes) + * @NFT_MSG_GETRULE: get a rule (enum nft_rule_attributes) + * @NFT_MSG_DELRULE: delete a rule (enum nft_rule_attributes) + * @NFT_MSG_NEWSET: create a new set (enum nft_set_attributes) + * @NFT_MSG_GETSET: get a set (enum nft_set_attributes) + * @NFT_MSG_DELSET: delete a set (enum nft_set_attributes) + * @NFT_MSG_NEWSETELEM: create a new set element (enum nft_set_elem_attributes) + * @NFT_MSG_GETSETELEM: get a set element (enum nft_set_elem_attributes) + * @NFT_MSG_DELSETELEM: delete a set element (enum nft_set_elem_attributes) + */ +enum nf_tables_msg_types { + NFT_MSG_NEWTABLE, + NFT_MSG_GETTABLE, + NFT_MSG_DELTABLE, + NFT_MSG_NEWCHAIN, + NFT_MSG_GETCHAIN, + NFT_MSG_DELCHAIN, + NFT_MSG_NEWRULE, + NFT_MSG_GETRULE, + NFT_MSG_DELRULE, + NFT_MSG_NEWSET, + NFT_MSG_GETSET, + NFT_MSG_DELSET, + NFT_MSG_NEWSETELEM, + NFT_MSG_GETSETELEM, + NFT_MSG_DELSETELEM, + NFT_MSG_MAX, +}; + +/** + * enum nft_list_attributes - nf_tables generic list netlink attributes + * + * @NFTA_LIST_ELEM: list element (NLA_NESTED) + */ +enum nft_list_attributes { + NFTA_LIST_UNPEC, + NFTA_LIST_ELEM, + __NFTA_LIST_MAX +}; +#define NFTA_LIST_MAX (__NFTA_LIST_MAX - 1) + +/** + * enum nft_hook_attributes - nf_tables netfilter hook netlink attributes + * + * @NFTA_HOOK_HOOKNUM: netfilter hook number (NLA_U32) + * @NFTA_HOOK_PRIORITY: netfilter hook priority (NLA_U32) + */ +enum nft_hook_attributes { + NFTA_HOOK_UNSPEC, + NFTA_HOOK_HOOKNUM, + NFTA_HOOK_PRIORITY, + __NFTA_HOOK_MAX +}; +#define NFTA_HOOK_MAX (__NFTA_HOOK_MAX - 1) + +/** + * enum nft_table_flags - nf_tables table flags + * + * @NFT_TABLE_F_DORMANT: this table is not active + */ +enum nft_table_flags { + NFT_TABLE_F_DORMANT = 0x1, +}; + +/** + * enum nft_table_attributes - nf_tables table netlink attributes + * + * @NFTA_TABLE_NAME: name of the table (NLA_STRING) + * @NFTA_TABLE_FLAGS: bitmask of enum nft_table_flags (NLA_U32) + */ +enum nft_table_attributes { + NFTA_TABLE_UNSPEC, + NFTA_TABLE_NAME, + NFTA_TABLE_FLAGS, + __NFTA_TABLE_MAX +}; +#define NFTA_TABLE_MAX (__NFTA_TABLE_MAX - 1) + +/** + * enum nft_chain_attributes - nf_tables chain netlink attributes + * + * @NFTA_CHAIN_TABLE: name of the table containing the chain (NLA_STRING) + * @NFTA_CHAIN_HANDLE: numeric handle of the chain (NLA_U64) + * @NFTA_CHAIN_NAME: name of the chain (NLA_STRING) + * @NFTA_CHAIN_HOOK: hook specification for basechains (NLA_NESTED: nft_hook_attributes) + * @NFTA_CHAIN_POLICY: numeric policy of the chain (NLA_U32) + * @NFTA_CHAIN_USE: number of references to this chain (NLA_U32) + * @NFTA_CHAIN_TYPE: type name of the string (NLA_NUL_STRING) + * @NFTA_CHAIN_COUNTERS: counter specification of the chain (NLA_NESTED: nft_counter_attributes) + */ +enum nft_chain_attributes { + NFTA_CHAIN_UNSPEC, + NFTA_CHAIN_TABLE, + NFTA_CHAIN_HANDLE, + NFTA_CHAIN_NAME, + NFTA_CHAIN_HOOK, + NFTA_CHAIN_POLICY, + NFTA_CHAIN_USE, + NFTA_CHAIN_TYPE, + NFTA_CHAIN_COUNTERS, + __NFTA_CHAIN_MAX +}; +#define NFTA_CHAIN_MAX (__NFTA_CHAIN_MAX - 1) + +/** + * enum nft_rule_attributes - nf_tables rule netlink attributes + * + * @NFTA_RULE_TABLE: name of the table containing the rule (NLA_STRING) + * @NFTA_RULE_CHAIN: name of the chain containing the rule (NLA_STRING) + * @NFTA_RULE_HANDLE: numeric handle of the rule (NLA_U64) + * @NFTA_RULE_EXPRESSIONS: list of expressions (NLA_NESTED: nft_expr_attributes) + * @NFTA_RULE_COMPAT: compatibility specifications of the rule (NLA_NESTED: nft_rule_compat_attributes) + * @NFTA_RULE_POSITION: numeric handle of the previous rule (NLA_U64) + */ +enum nft_rule_attributes { + NFTA_RULE_UNSPEC, + NFTA_RULE_TABLE, + NFTA_RULE_CHAIN, + NFTA_RULE_HANDLE, + NFTA_RULE_EXPRESSIONS, + NFTA_RULE_COMPAT, + NFTA_RULE_POSITION, + __NFTA_RULE_MAX +}; +#define NFTA_RULE_MAX (__NFTA_RULE_MAX - 1) + +/** + * enum nft_rule_compat_flags - nf_tables rule compat flags + * + * @NFT_RULE_COMPAT_F_INV: invert the check result + */ +enum nft_rule_compat_flags { + NFT_RULE_COMPAT_F_INV = (1 << 1), + NFT_RULE_COMPAT_F_MASK = NFT_RULE_COMPAT_F_INV, +}; + +/** + * enum nft_rule_compat_attributes - nf_tables rule compat attributes + * + * @NFTA_RULE_COMPAT_PROTO: numerice value of handled protocol (NLA_U32) + * @NFTA_RULE_COMPAT_FLAGS: bitmask of enum nft_rule_compat_flags (NLA_U32) + */ +enum nft_rule_compat_attributes { + NFTA_RULE_COMPAT_UNSPEC, + NFTA_RULE_COMPAT_PROTO, + NFTA_RULE_COMPAT_FLAGS, + __NFTA_RULE_COMPAT_MAX +}; +#define NFTA_RULE_COMPAT_MAX (__NFTA_RULE_COMPAT_MAX - 1) + +/** + * enum nft_set_flags - nf_tables set flags + * + * @NFT_SET_ANONYMOUS: name allocation, automatic cleanup on unlink + * @NFT_SET_CONSTANT: set contents may not change while bound + * @NFT_SET_INTERVAL: set contains intervals + * @NFT_SET_MAP: set is used as a dictionary + */ +enum nft_set_flags { + NFT_SET_ANONYMOUS = 0x1, + NFT_SET_CONSTANT = 0x2, + NFT_SET_INTERVAL = 0x4, + NFT_SET_MAP = 0x8, +}; + +/** + * enum nft_set_attributes - nf_tables set netlink attributes + * + * @NFTA_SET_TABLE: table name (NLA_STRING) + * @NFTA_SET_NAME: set name (NLA_STRING) + * @NFTA_SET_FLAGS: bitmask of enum nft_set_flags (NLA_U32) + * @NFTA_SET_KEY_TYPE: key data type, informational purpose only (NLA_U32) + * @NFTA_SET_KEY_LEN: key data length (NLA_U32) + * @NFTA_SET_DATA_TYPE: mapping data type (NLA_U32) + * @NFTA_SET_DATA_LEN: mapping data length (NLA_U32) + */ +enum nft_set_attributes { + NFTA_SET_UNSPEC, + NFTA_SET_TABLE, + NFTA_SET_NAME, + NFTA_SET_FLAGS, + NFTA_SET_KEY_TYPE, + NFTA_SET_KEY_LEN, + NFTA_SET_DATA_TYPE, + NFTA_SET_DATA_LEN, + __NFTA_SET_MAX +}; +#define NFTA_SET_MAX (__NFTA_SET_MAX - 1) + +/** + * enum nft_set_elem_flags - nf_tables set element flags + * + * @NFT_SET_ELEM_INTERVAL_END: element ends the previous interval + */ +enum nft_set_elem_flags { + NFT_SET_ELEM_INTERVAL_END = 0x1, +}; + +/** + * enum nft_set_elem_attributes - nf_tables set element netlink attributes + * + * @NFTA_SET_ELEM_KEY: key value (NLA_NESTED: nft_data) + * @NFTA_SET_ELEM_DATA: data value of mapping (NLA_NESTED: nft_data_attributes) + * @NFTA_SET_ELEM_FLAGS: bitmask of nft_set_elem_flags (NLA_U32) + */ +enum nft_set_elem_attributes { + NFTA_SET_ELEM_UNSPEC, + NFTA_SET_ELEM_KEY, + NFTA_SET_ELEM_DATA, + NFTA_SET_ELEM_FLAGS, + __NFTA_SET_ELEM_MAX +}; +#define NFTA_SET_ELEM_MAX (__NFTA_SET_ELEM_MAX - 1) + +/** + * enum nft_set_elem_list_attributes - nf_tables set element list netlink attributes + * + * @NFTA_SET_ELEM_LIST_TABLE: table of the set to be changed (NLA_STRING) + * @NFTA_SET_ELEM_LIST_SET: name of the set to be changed (NLA_STRING) + * @NFTA_SET_ELEM_LIST_ELEMENTS: list of set elements (NLA_NESTED: nft_set_elem_attributes) + */ +enum nft_set_elem_list_attributes { + NFTA_SET_ELEM_LIST_UNSPEC, + NFTA_SET_ELEM_LIST_TABLE, + NFTA_SET_ELEM_LIST_SET, + NFTA_SET_ELEM_LIST_ELEMENTS, + __NFTA_SET_ELEM_LIST_MAX +}; +#define NFTA_SET_ELEM_LIST_MAX (__NFTA_SET_ELEM_LIST_MAX - 1) + +/** + * enum nft_data_types - nf_tables data types + * + * @NFT_DATA_VALUE: generic data + * @NFT_DATA_VERDICT: netfilter verdict + * + * The type of data is usually determined by the kernel directly and is not + * explicitly specified by userspace. The only difference are sets, where + * userspace specifies the key and mapping data types. + * + * The values 0xffffff00-0xffffffff are reserved for internally used types. + * The remaining range can be freely used by userspace to encode types, all + * values are equivalent to NFT_DATA_VALUE. + */ +enum nft_data_types { + NFT_DATA_VALUE, + NFT_DATA_VERDICT = 0xffffff00U, +}; + +#define NFT_DATA_RESERVED_MASK 0xffffff00U + +/** + * enum nft_data_attributes - nf_tables data netlink attributes + * + * @NFTA_DATA_VALUE: generic data (NLA_BINARY) + * @NFTA_DATA_VERDICT: nf_tables verdict (NLA_NESTED: nft_verdict_attributes) + */ +enum nft_data_attributes { + NFTA_DATA_UNSPEC, + NFTA_DATA_VALUE, + NFTA_DATA_VERDICT, + __NFTA_DATA_MAX +}; +#define NFTA_DATA_MAX (__NFTA_DATA_MAX - 1) + +/** + * enum nft_verdict_attributes - nf_tables verdict netlink attributes + * + * @NFTA_VERDICT_CODE: nf_tables verdict (NLA_U32: enum nft_verdicts) + * @NFTA_VERDICT_CHAIN: jump target chain name (NLA_STRING) + */ +enum nft_verdict_attributes { + NFTA_VERDICT_UNSPEC, + NFTA_VERDICT_CODE, + NFTA_VERDICT_CHAIN, + __NFTA_VERDICT_MAX +}; +#define NFTA_VERDICT_MAX (__NFTA_VERDICT_MAX - 1) + +/** + * enum nft_expr_attributes - nf_tables expression netlink attributes + * + * @NFTA_EXPR_NAME: name of the expression type (NLA_STRING) + * @NFTA_EXPR_DATA: type specific data (NLA_NESTED) + */ +enum nft_expr_attributes { + NFTA_EXPR_UNSPEC, + NFTA_EXPR_NAME, + NFTA_EXPR_DATA, + __NFTA_EXPR_MAX +}; +#define NFTA_EXPR_MAX (__NFTA_EXPR_MAX - 1) + +/** + * enum nft_immediate_attributes - nf_tables immediate expression netlink attributes + * + * @NFTA_IMMEDIATE_DREG: destination register to load data into (NLA_U32) + * @NFTA_IMMEDIATE_DATA: data to load (NLA_NESTED: nft_data_attributes) + */ +enum nft_immediate_attributes { + NFTA_IMMEDIATE_UNSPEC, + NFTA_IMMEDIATE_DREG, + NFTA_IMMEDIATE_DATA, + __NFTA_IMMEDIATE_MAX +}; +#define NFTA_IMMEDIATE_MAX (__NFTA_IMMEDIATE_MAX - 1) + +/** + * enum nft_bitwise_attributes - nf_tables bitwise expression netlink attributes + * + * @NFTA_BITWISE_SREG: source register (NLA_U32: nft_registers) + * @NFTA_BITWISE_DREG: destination register (NLA_U32: nft_registers) + * @NFTA_BITWISE_LEN: length of operands (NLA_U32) + * @NFTA_BITWISE_MASK: mask value (NLA_NESTED: nft_data_attributes) + * @NFTA_BITWISE_XOR: xor value (NLA_NESTED: nft_data_attributes) + * + * The bitwise expression performs the following operation: + * + * dreg = (sreg & mask) ^ xor + * + * which allow to express all bitwise operations: + * + * mask xor + * NOT: 1 1 + * OR: 0 x + * XOR: 1 x + * AND: x 0 + */ +enum nft_bitwise_attributes { + NFTA_BITWISE_UNSPEC, + NFTA_BITWISE_SREG, + NFTA_BITWISE_DREG, + NFTA_BITWISE_LEN, + NFTA_BITWISE_MASK, + NFTA_BITWISE_XOR, + __NFTA_BITWISE_MAX +}; +#define NFTA_BITWISE_MAX (__NFTA_BITWISE_MAX - 1) + +/** + * enum nft_byteorder_ops - nf_tables byteorder operators + * + * @NFT_BYTEORDER_NTOH: network to host operator + * @NFT_BYTEORDER_HTON: host to network opertaor + */ +enum nft_byteorder_ops { + NFT_BYTEORDER_NTOH, + NFT_BYTEORDER_HTON, +}; + +/** + * enum nft_byteorder_attributes - nf_tables byteorder expression netlink attributes + * + * @NFTA_BYTEORDER_SREG: source register (NLA_U32: nft_registers) + * @NFTA_BYTEORDER_DREG: destination register (NLA_U32: nft_registers) + * @NFTA_BYTEORDER_OP: operator (NLA_U32: enum nft_byteorder_ops) + * @NFTA_BYTEORDER_LEN: length of the data (NLA_U32) + * @NFTA_BYTEORDER_SIZE: data size in bytes (NLA_U32: 2 or 4) + */ +enum nft_byteorder_attributes { + NFTA_BYTEORDER_UNSPEC, + NFTA_BYTEORDER_SREG, + NFTA_BYTEORDER_DREG, + NFTA_BYTEORDER_OP, + NFTA_BYTEORDER_LEN, + NFTA_BYTEORDER_SIZE, + __NFTA_BYTEORDER_MAX +}; +#define NFTA_BYTEORDER_MAX (__NFTA_BYTEORDER_MAX - 1) + +/** + * enum nft_cmp_ops - nf_tables relational operator + * + * @NFT_CMP_EQ: equal + * @NFT_CMP_NEQ: not equal + * @NFT_CMP_LT: less than + * @NFT_CMP_LTE: less than or equal to + * @NFT_CMP_GT: greater than + * @NFT_CMP_GTE: greater than or equal to + */ +enum nft_cmp_ops { + NFT_CMP_EQ, + NFT_CMP_NEQ, + NFT_CMP_LT, + NFT_CMP_LTE, + NFT_CMP_GT, + NFT_CMP_GTE, +}; + +/** + * enum nft_cmp_attributes - nf_tables cmp expression netlink attributes + * + * @NFTA_CMP_SREG: source register of data to compare (NLA_U32: nft_registers) + * @NFTA_CMP_OP: cmp operation (NLA_U32: nft_cmp_ops) + * @NFTA_CMP_DATA: data to compare against (NLA_NESTED: nft_data_attributes) + */ +enum nft_cmp_attributes { + NFTA_CMP_UNSPEC, + NFTA_CMP_SREG, + NFTA_CMP_OP, + NFTA_CMP_DATA, + __NFTA_CMP_MAX +}; +#define NFTA_CMP_MAX (__NFTA_CMP_MAX - 1) + +/** + * enum nft_lookup_attributes - nf_tables set lookup expression netlink attributes + * + * @NFTA_LOOKUP_SET: name of the set where to look for (NLA_STRING) + * @NFTA_LOOKUP_SREG: source register of the data to look for (NLA_U32: nft_registers) + * @NFTA_LOOKUP_DREG: destination register (NLA_U32: nft_registers) + */ +enum nft_lookup_attributes { + NFTA_LOOKUP_UNSPEC, + NFTA_LOOKUP_SET, + NFTA_LOOKUP_SREG, + NFTA_LOOKUP_DREG, + __NFTA_LOOKUP_MAX +}; +#define NFTA_LOOKUP_MAX (__NFTA_LOOKUP_MAX - 1) + +/** + * enum nft_payload_bases - nf_tables payload expression offset bases + * + * @NFT_PAYLOAD_LL_HEADER: link layer header + * @NFT_PAYLOAD_NETWORK_HEADER: network header + * @NFT_PAYLOAD_TRANSPORT_HEADER: transport header + */ +enum nft_payload_bases { + NFT_PAYLOAD_LL_HEADER, + NFT_PAYLOAD_NETWORK_HEADER, + NFT_PAYLOAD_TRANSPORT_HEADER, +}; + +/** + * enum nft_payload_attributes - nf_tables payload expression netlink attributes + * + * @NFTA_PAYLOAD_DREG: destination register to load data into (NLA_U32: nft_registers) + * @NFTA_PAYLOAD_BASE: payload base (NLA_U32: nft_payload_bases) + * @NFTA_PAYLOAD_OFFSET: payload offset relative to base (NLA_U32) + * @NFTA_PAYLOAD_LEN: payload length (NLA_U32) + */ +enum nft_payload_attributes { + NFTA_PAYLOAD_UNSPEC, + NFTA_PAYLOAD_DREG, + NFTA_PAYLOAD_BASE, + NFTA_PAYLOAD_OFFSET, + NFTA_PAYLOAD_LEN, + __NFTA_PAYLOAD_MAX +}; +#define NFTA_PAYLOAD_MAX (__NFTA_PAYLOAD_MAX - 1) + +/** + * enum nft_exthdr_attributes - nf_tables IPv6 extension header expression netlink attributes + * + * @NFTA_EXTHDR_DREG: destination register (NLA_U32: nft_registers) + * @NFTA_EXTHDR_TYPE: extension header type (NLA_U8) + * @NFTA_EXTHDR_OFFSET: extension header offset (NLA_U32) + * @NFTA_EXTHDR_LEN: extension header length (NLA_U32) + */ +enum nft_exthdr_attributes { + NFTA_EXTHDR_UNSPEC, + NFTA_EXTHDR_DREG, + NFTA_EXTHDR_TYPE, + NFTA_EXTHDR_OFFSET, + NFTA_EXTHDR_LEN, + __NFTA_EXTHDR_MAX +}; +#define NFTA_EXTHDR_MAX (__NFTA_EXTHDR_MAX - 1) + +/** + * enum nft_meta_keys - nf_tables meta expression keys + * + * @NFT_META_LEN: packet length (skb->len) + * @NFT_META_PROTOCOL: packet ethertype protocol (skb->protocol), invalid in OUTPUT + * @NFT_META_PRIORITY: packet priority (skb->priority) + * @NFT_META_MARK: packet mark (skb->mark) + * @NFT_META_IIF: packet input interface index (dev->ifindex) + * @NFT_META_OIF: packet output interface index (dev->ifindex) + * @NFT_META_IIFNAME: packet input interface name (dev->name) + * @NFT_META_OIFNAME: packet output interface name (dev->name) + * @NFT_META_IIFTYPE: packet input interface type (dev->type) + * @NFT_META_OIFTYPE: packet output interface type (dev->type) + * @NFT_META_SKUID: originating socket UID (fsuid) + * @NFT_META_SKGID: originating socket GID (fsgid) + * @NFT_META_NFTRACE: packet nftrace bit + * @NFT_META_RTCLASSID: realm value of packet's route (skb->dst->tclassid) + * @NFT_META_SECMARK: packet secmark (skb->secmark) + */ +enum nft_meta_keys { + NFT_META_LEN, + NFT_META_PROTOCOL, + NFT_META_PRIORITY, + NFT_META_MARK, + NFT_META_IIF, + NFT_META_OIF, + NFT_META_IIFNAME, + NFT_META_OIFNAME, + NFT_META_IIFTYPE, + NFT_META_OIFTYPE, + NFT_META_SKUID, + NFT_META_SKGID, + NFT_META_NFTRACE, + NFT_META_RTCLASSID, + NFT_META_SECMARK, +}; + +/** + * enum nft_meta_attributes - nf_tables meta expression netlink attributes + * + * @NFTA_META_DREG: destination register (NLA_U32) + * @NFTA_META_KEY: meta data item to load (NLA_U32: nft_meta_keys) + */ +enum nft_meta_attributes { + NFTA_META_UNSPEC, + NFTA_META_DREG, + NFTA_META_KEY, + __NFTA_META_MAX +}; +#define NFTA_META_MAX (__NFTA_META_MAX - 1) + +/** + * enum nft_ct_keys - nf_tables ct expression keys + * + * @NFT_CT_STATE: conntrack state (bitmask of enum ip_conntrack_info) + * @NFT_CT_DIRECTION: conntrack direction (enum ip_conntrack_dir) + * @NFT_CT_STATUS: conntrack status (bitmask of enum ip_conntrack_status) + * @NFT_CT_MARK: conntrack mark value + * @NFT_CT_SECMARK: conntrack secmark value + * @NFT_CT_EXPIRATION: relative conntrack expiration time in ms + * @NFT_CT_HELPER: connection tracking helper assigned to conntrack + * @NFT_CT_L3PROTOCOL: conntrack layer 3 protocol + * @NFT_CT_SRC: conntrack layer 3 protocol source (IPv4/IPv6 address) + * @NFT_CT_DST: conntrack layer 3 protocol destination (IPv4/IPv6 address) + * @NFT_CT_PROTOCOL: conntrack layer 4 protocol + * @NFT_CT_PROTO_SRC: conntrack layer 4 protocol source + * @NFT_CT_PROTO_DST: conntrack layer 4 protocol destination + */ +enum nft_ct_keys { + NFT_CT_STATE, + NFT_CT_DIRECTION, + NFT_CT_STATUS, + NFT_CT_MARK, + NFT_CT_SECMARK, + NFT_CT_EXPIRATION, + NFT_CT_HELPER, + NFT_CT_L3PROTOCOL, + NFT_CT_SRC, + NFT_CT_DST, + NFT_CT_PROTOCOL, + NFT_CT_PROTO_SRC, + NFT_CT_PROTO_DST, +}; + +/** + * enum nft_ct_attributes - nf_tables ct expression netlink attributes + * + * @NFTA_CT_DREG: destination register (NLA_U32) + * @NFTA_CT_KEY: conntrack data item to load (NLA_U32: nft_ct_keys) + * @NFTA_CT_DIRECTION: direction in case of directional keys (NLA_U8) + */ +enum nft_ct_attributes { + NFTA_CT_UNSPEC, + NFTA_CT_DREG, + NFTA_CT_KEY, + NFTA_CT_DIRECTION, + __NFTA_CT_MAX +}; +#define NFTA_CT_MAX (__NFTA_CT_MAX - 1) + +/** + * enum nft_limit_attributes - nf_tables limit expression netlink attributes + * + * @NFTA_LIMIT_RATE: refill rate (NLA_U64) + * @NFTA_LIMIT_UNIT: refill unit (NLA_U64) + */ +enum nft_limit_attributes { + NFTA_LIMIT_UNSPEC, + NFTA_LIMIT_RATE, + NFTA_LIMIT_UNIT, + __NFTA_LIMIT_MAX +}; +#define NFTA_LIMIT_MAX (__NFTA_LIMIT_MAX - 1) + +/** + * enum nft_counter_attributes - nf_tables counter expression netlink attributes + * + * @NFTA_COUNTER_BYTES: number of bytes (NLA_U64) + * @NFTA_COUNTER_PACKETS: number of packets (NLA_U64) + */ +enum nft_counter_attributes { + NFTA_COUNTER_UNSPEC, + NFTA_COUNTER_BYTES, + NFTA_COUNTER_PACKETS, + __NFTA_COUNTER_MAX +}; +#define NFTA_COUNTER_MAX (__NFTA_COUNTER_MAX - 1) + +/** + * enum nft_log_attributes - nf_tables log expression netlink attributes + * + * @NFTA_LOG_GROUP: netlink group to send messages to (NLA_U32) + * @NFTA_LOG_PREFIX: prefix to prepend to log messages (NLA_STRING) + * @NFTA_LOG_SNAPLEN: length of payload to include in netlink message (NLA_U32) + * @NFTA_LOG_QTHRESHOLD: queue threshold (NLA_U32) + */ +enum nft_log_attributes { + NFTA_LOG_UNSPEC, + NFTA_LOG_GROUP, + NFTA_LOG_PREFIX, + NFTA_LOG_SNAPLEN, + NFTA_LOG_QTHRESHOLD, + __NFTA_LOG_MAX +}; +#define NFTA_LOG_MAX (__NFTA_LOG_MAX - 1) + +/** + * enum nft_reject_types - nf_tables reject expression reject types + * + * @NFT_REJECT_ICMP_UNREACH: reject using ICMP unreachable + * @NFT_REJECT_TCP_RST: reject using TCP RST + */ +enum nft_reject_types { + NFT_REJECT_ICMP_UNREACH, + NFT_REJECT_TCP_RST, +}; + +/** + * enum nft_reject_attributes - nf_tables reject expression netlink attributes + * + * @NFTA_REJECT_TYPE: packet type to use (NLA_U32: nft_reject_types) + * @NFTA_REJECT_ICMP_CODE: ICMP code to use (NLA_U8) + */ +enum nft_reject_attributes { + NFTA_REJECT_UNSPEC, + NFTA_REJECT_TYPE, + NFTA_REJECT_ICMP_CODE, + __NFTA_REJECT_MAX +}; +#define NFTA_REJECT_MAX (__NFTA_REJECT_MAX - 1) + +/** + * enum nft_nat_types - nf_tables nat expression NAT types + * + * @NFT_NAT_SNAT: source NAT + * @NFT_NAT_DNAT: destination NAT + */ +enum nft_nat_types { + NFT_NAT_SNAT, + NFT_NAT_DNAT, +}; + +/** + * enum nft_nat_attributes - nf_tables nat expression netlink attributes + * + * @NFTA_NAT_TYPE: NAT type (NLA_U32: nft_nat_types) + * @NFTA_NAT_FAMILY: NAT family (NLA_U32) + * @NFTA_NAT_REG_ADDR_MIN: source register of address range start (NLA_U32: nft_registers) + * @NFTA_NAT_REG_ADDR_MAX: source register of address range end (NLA_U32: nft_registers) + * @NFTA_NAT_REG_PROTO_MIN: source register of proto range start (NLA_U32: nft_registers) + * @NFTA_NAT_REG_PROTO_MAX: source register of proto range end (NLA_U32: nft_registers) + */ +enum nft_nat_attributes { + NFTA_NAT_UNSPEC, + NFTA_NAT_TYPE, + NFTA_NAT_FAMILY, + NFTA_NAT_REG_ADDR_MIN, + NFTA_NAT_REG_ADDR_MAX, + NFTA_NAT_REG_PROTO_MIN, + NFTA_NAT_REG_PROTO_MAX, + __NFTA_NAT_MAX +}; +#define NFTA_NAT_MAX (__NFTA_NAT_MAX - 1) + +#endif /* _LINUX_NF_TABLES_H */ diff --git a/include/uapi/linux/netfilter/nf_tables_compat.h b/include/uapi/linux/netfilter/nf_tables_compat.h new file mode 100644 index 000000000000..8310f5f76551 --- /dev/null +++ b/include/uapi/linux/netfilter/nf_tables_compat.h @@ -0,0 +1,38 @@ +#ifndef _NFT_COMPAT_NFNETLINK_H_ +#define _NFT_COMPAT_NFNETLINK_H_ + +enum nft_target_attributes { + NFTA_TARGET_UNSPEC, + NFTA_TARGET_NAME, + NFTA_TARGET_REV, + NFTA_TARGET_INFO, + __NFTA_TARGET_MAX +}; +#define NFTA_TARGET_MAX (__NFTA_TARGET_MAX - 1) + +enum nft_match_attributes { + NFTA_MATCH_UNSPEC, + NFTA_MATCH_NAME, + NFTA_MATCH_REV, + NFTA_MATCH_INFO, + __NFTA_MATCH_MAX +}; +#define NFTA_MATCH_MAX (__NFTA_MATCH_MAX - 1) + +#define NFT_COMPAT_NAME_MAX 32 + +enum { + NFNL_MSG_COMPAT_GET, + NFNL_MSG_COMPAT_MAX +}; + +enum { + NFTA_COMPAT_UNSPEC = 0, + NFTA_COMPAT_NAME, + NFTA_COMPAT_REV, + NFTA_COMPAT_TYPE, + __NFTA_COMPAT_MAX, +}; +#define NFTA_COMPAT_MAX (__NFTA_COMPAT_MAX - 1) + +#endif diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h index 4a4efafad5f4..596ddd45253c 100644 --- a/include/uapi/linux/netfilter/nfnetlink.h +++ b/include/uapi/linux/netfilter/nfnetlink.h @@ -18,6 +18,8 @@ enum nfnetlink_groups { #define NFNLGRP_CONNTRACK_EXP_UPDATE NFNLGRP_CONNTRACK_EXP_UPDATE NFNLGRP_CONNTRACK_EXP_DESTROY, #define NFNLGRP_CONNTRACK_EXP_DESTROY NFNLGRP_CONNTRACK_EXP_DESTROY + NFNLGRP_NFTABLES, +#define NFNLGRP_NFTABLES NFNLGRP_NFTABLES __NFNLGRP_MAX, }; #define NFNLGRP_MAX (__NFNLGRP_MAX - 1) @@ -51,6 +53,12 @@ struct nfgenmsg { #define NFNL_SUBSYS_ACCT 7 #define NFNL_SUBSYS_CTNETLINK_TIMEOUT 8 #define NFNL_SUBSYS_CTHELPER 9 -#define NFNL_SUBSYS_COUNT 10 +#define NFNL_SUBSYS_NFTABLES 10 +#define NFNL_SUBSYS_NFT_COMPAT 11 +#define NFNL_SUBSYS_COUNT 12 + +/* Reserved control nfnetlink messages */ +#define NFNL_MSG_BATCH_BEGIN NLMSG_MIN_TYPE +#define NFNL_MSG_BATCH_END NLMSG_MIN_TYPE+1 #endif /* _UAPI_NFNETLINK_H */ diff --git a/include/xen/interface/io/netif.h b/include/xen/interface/io/netif.h index eb262e3324d2..c50061db6098 100644 --- a/include/xen/interface/io/netif.h +++ b/include/xen/interface/io/netif.h @@ -51,6 +51,20 @@ */ /* + * "feature-no-csum-offload" should be used to turn IPv4 TCP/UDP checksum + * offload off or on. If it is missing then the feature is assumed to be on. + * "feature-ipv6-csum-offload" should be used to turn IPv6 TCP/UDP checksum + * offload on or off. If it is missing then the feature is assumed to be off. + */ + +/* + * "feature-gso-tcpv4" and "feature-gso-tcpv6" advertise the capability to + * handle large TCP packets (in IPv4 or IPv6 form respectively). Neither + * frontends nor backends are assumed to be capable unless the flags are + * present. + */ + +/* * This is the 'wire' format for packets: * Request 1: xen_netif_tx_request -- XEN_NETTXF_* (any flags) * [Request 2: xen_netif_extra_info] (only if request 1 has XEN_NETTXF_extra_info) @@ -95,8 +109,10 @@ struct xen_netif_tx_request { #define _XEN_NETIF_EXTRA_FLAG_MORE (0) #define XEN_NETIF_EXTRA_FLAG_MORE (1U<<_XEN_NETIF_EXTRA_FLAG_MORE) -/* GSO types - only TCPv4 currently supported. */ +/* GSO types */ +#define XEN_NETIF_GSO_TYPE_NONE (0) #define XEN_NETIF_GSO_TYPE_TCPV4 (1) +#define XEN_NETIF_GSO_TYPE_TCPV6 (2) /* * This structure needs to fit within both netif_tx_request and diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile index 8ddbfe66d637..4f4aabbd8eab 100644 --- a/net/batman-adv/Makefile +++ b/net/batman-adv/Makefile @@ -24,6 +24,7 @@ batman-adv-y += bitarray.o batman-adv-$(CONFIG_BATMAN_ADV_BLA) += bridge_loop_avoidance.o batman-adv-y += debugfs.o batman-adv-$(CONFIG_BATMAN_ADV_DAT) += distributed-arp-table.o +batman-adv-y += fragmentation.o batman-adv-y += gateway_client.o batman-adv-y += gateway_common.o batman-adv-y += hard-interface.o @@ -37,4 +38,3 @@ batman-adv-y += send.o batman-adv-y += soft-interface.o batman-adv-y += sysfs.o batman-adv-y += translation-table.o -batman-adv-y += unicast.o diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 264de88db320..5bb58d7bdd56 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -863,25 +863,25 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, struct arphdr *arphdr; uint8_t *hw_src, *hw_dst; struct batadv_bla_claim_dst *bla_dst; - uint16_t proto; + __be16 proto; int headlen; unsigned short vid = BATADV_NO_FLAGS; int ret; ethhdr = eth_hdr(skb); - if (ntohs(ethhdr->h_proto) == ETH_P_8021Q) { + if (ethhdr->h_proto == htons(ETH_P_8021Q)) { vhdr = (struct vlan_ethhdr *)ethhdr; vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK; vid |= BATADV_VLAN_HAS_TAG; - proto = ntohs(vhdr->h_vlan_encapsulated_proto); + proto = vhdr->h_vlan_encapsulated_proto; headlen = sizeof(*vhdr); } else { - proto = ntohs(ethhdr->h_proto); + proto = ethhdr->h_proto; headlen = ETH_HLEN; } - if (proto != ETH_P_ARP) + if (proto != htons(ETH_P_ARP)) return 0; /* not a claim frame */ /* this must be a ARP frame. check if it is a claim. */ @@ -1379,8 +1379,8 @@ int batadv_bla_is_backbone_gw(struct sk_buff *skb, ethhdr = (struct ethhdr *)(((uint8_t *)skb->data) + hdr_size); - if (ntohs(ethhdr->h_proto) == ETH_P_8021Q) { - if (!pskb_may_pull(skb, hdr_size + sizeof(struct vlan_ethhdr))) + if (ethhdr->h_proto == htons(ETH_P_8021Q)) { + if (!pskb_may_pull(skb, hdr_size + VLAN_ETH_HLEN)) return 0; vhdr = (struct vlan_ethhdr *)(skb->data + hdr_size); diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index f07ec320dc01..99da41290f82 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -29,7 +29,6 @@ #include "send.h" #include "types.h" #include "translation-table.h" -#include "unicast.h" static void batadv_dat_purge(struct work_struct *work); @@ -592,9 +591,9 @@ static bool batadv_dat_send_data(struct batadv_priv *bat_priv, goto free_orig; tmp_skb = pskb_copy(skb, GFP_ATOMIC); - if (!batadv_unicast_4addr_prepare_skb(bat_priv, tmp_skb, - cand[i].orig_node, - packet_subtype)) { + if (!batadv_send_skb_prepare_unicast_4addr(bat_priv, tmp_skb, + cand[i].orig_node, + packet_subtype)) { kfree_skb(tmp_skb); goto free_neigh; } @@ -990,10 +989,10 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv, * that a node not using the 4addr packet format doesn't support it. */ if (hdr_size == sizeof(struct batadv_unicast_4addr_packet)) - err = batadv_unicast_4addr_send_skb(bat_priv, skb_new, + err = batadv_send_skb_unicast_4addr(bat_priv, skb_new, BATADV_P_DAT_CACHE_REPLY); else - err = batadv_unicast_send_skb(bat_priv, skb_new); + err = batadv_send_skb_unicast(bat_priv, skb_new); if (!err) { batadv_inc_counter(bat_priv, BATADV_CNT_DAT_CACHED_REPLY_TX); diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c new file mode 100644 index 000000000000..271d321b3a04 --- /dev/null +++ b/net/batman-adv/fragmentation.c @@ -0,0 +1,491 @@ +/* Copyright (C) 2013 B.A.T.M.A.N. contributors: + * + * Martin Hundebøll <martin@hundeboll.net> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#include "main.h" +#include "fragmentation.h" +#include "send.h" +#include "originator.h" +#include "routing.h" +#include "hard-interface.h" +#include "soft-interface.h" + + +/** + * batadv_frag_clear_chain - delete entries in the fragment buffer chain + * @head: head of chain with entries. + * + * Free fragments in the passed hlist. Should be called with appropriate lock. + */ +static void batadv_frag_clear_chain(struct hlist_head *head) +{ + struct batadv_frag_list_entry *entry; + struct hlist_node *node; + + hlist_for_each_entry_safe(entry, node, head, list) { + hlist_del(&entry->list); + kfree_skb(entry->skb); + kfree(entry); + } +} + +/** + * batadv_frag_purge_orig - free fragments associated to an orig + * @orig_node: originator to free fragments from + * @check_cb: optional function to tell if an entry should be purged + */ +void batadv_frag_purge_orig(struct batadv_orig_node *orig_node, + bool (*check_cb)(struct batadv_frag_table_entry *)) +{ + struct batadv_frag_table_entry *chain; + uint8_t i; + + for (i = 0; i < BATADV_FRAG_BUFFER_COUNT; i++) { + chain = &orig_node->fragments[i]; + spin_lock_bh(&orig_node->fragments[i].lock); + + if (!check_cb || check_cb(chain)) { + batadv_frag_clear_chain(&orig_node->fragments[i].head); + orig_node->fragments[i].size = 0; + } + + spin_unlock_bh(&orig_node->fragments[i].lock); + } +} + +/** + * batadv_frag_size_limit - maximum possible size of packet to be fragmented + * + * Returns the maximum size of payload that can be fragmented. + */ +static int batadv_frag_size_limit(void) +{ + int limit = BATADV_FRAG_MAX_FRAG_SIZE; + + limit -= sizeof(struct batadv_frag_packet); + limit *= BATADV_FRAG_MAX_FRAGMENTS; + + return limit; +} + +/** + * batadv_frag_init_chain - check and prepare fragment chain for new fragment + * @chain: chain in fragments table to init + * @seqno: sequence number of the received fragment + * + * Make chain ready for a fragment with sequence number "seqno". Delete existing + * entries if they have an "old" sequence number. + * + * Caller must hold chain->lock. + * + * Returns true if chain is empty and caller can just insert the new fragment + * without searching for the right position. + */ +static bool batadv_frag_init_chain(struct batadv_frag_table_entry *chain, + uint16_t seqno) +{ + if (chain->seqno == seqno) + return false; + + if (!hlist_empty(&chain->head)) + batadv_frag_clear_chain(&chain->head); + + chain->size = 0; + chain->seqno = seqno; + + return true; +} + +/** + * batadv_frag_insert_packet - insert a fragment into a fragment chain + * @orig_node: originator that the fragment was received from + * @skb: skb to insert + * @chain_out: list head to attach complete chains of fragments to + * + * Insert a new fragment into the reverse ordered chain in the right table + * entry. The hash table entry is cleared if "old" fragments exist in it. + * + * Returns true if skb is buffered, false on error. If the chain has all the + * fragments needed to merge the packet, the chain is moved to the passed head + * to avoid locking the chain in the table. + */ +static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node, + struct sk_buff *skb, + struct hlist_head *chain_out) +{ + struct batadv_frag_table_entry *chain; + struct batadv_frag_list_entry *frag_entry_new = NULL, *frag_entry_curr; + struct batadv_frag_packet *frag_packet; + uint8_t bucket; + uint16_t seqno, hdr_size = sizeof(struct batadv_frag_packet); + bool ret = false; + + /* Linearize packet to avoid linearizing 16 packets in a row when doing + * the later merge. Non-linear merge should be added to remove this + * linearization. + */ + if (skb_linearize(skb) < 0) + goto err; + + frag_packet = (struct batadv_frag_packet *)skb->data; + seqno = ntohs(frag_packet->seqno); + bucket = seqno % BATADV_FRAG_BUFFER_COUNT; + + frag_entry_new = kmalloc(sizeof(*frag_entry_new), GFP_ATOMIC); + if (!frag_entry_new) + goto err; + + frag_entry_new->skb = skb; + frag_entry_new->no = frag_packet->no; + + /* Select entry in the "chain table" and delete any prior fragments + * with another sequence number. batadv_frag_init_chain() returns true, + * if the list is empty at return. + */ + chain = &orig_node->fragments[bucket]; + spin_lock_bh(&chain->lock); + if (batadv_frag_init_chain(chain, seqno)) { + hlist_add_head(&frag_entry_new->list, &chain->head); + chain->size = skb->len - hdr_size; + chain->timestamp = jiffies; + ret = true; + goto out; + } + + /* Find the position for the new fragment. */ + hlist_for_each_entry(frag_entry_curr, &chain->head, list) { + /* Drop packet if fragment already exists. */ + if (frag_entry_curr->no == frag_entry_new->no) + goto err_unlock; + + /* Order fragments from highest to lowest. */ + if (frag_entry_curr->no < frag_entry_new->no) { + hlist_add_before(&frag_entry_new->list, + &frag_entry_curr->list); + chain->size += skb->len - hdr_size; + chain->timestamp = jiffies; + ret = true; + goto out; + } + } + + /* Reached the end of the list, so insert after 'frag_entry_curr'. */ + if (likely(frag_entry_curr)) { + hlist_add_after(&frag_entry_curr->list, &frag_entry_new->list); + chain->size += skb->len - hdr_size; + chain->timestamp = jiffies; + ret = true; + } + +out: + if (chain->size > batadv_frag_size_limit() || + ntohs(frag_packet->total_size) > batadv_frag_size_limit()) { + /* Clear chain if total size of either the list or the packet + * exceeds the maximum size of one merged packet. + */ + batadv_frag_clear_chain(&chain->head); + chain->size = 0; + } else if (ntohs(frag_packet->total_size) == chain->size) { + /* All fragments received. Hand over chain to caller. */ + hlist_move_list(&chain->head, chain_out); + chain->size = 0; + } + +err_unlock: + spin_unlock_bh(&chain->lock); + +err: + if (!ret) + kfree(frag_entry_new); + + return ret; +} + +/** + * batadv_frag_merge_packets - merge a chain of fragments + * @chain: head of chain with fragments + * @skb: packet with total size of skb after merging + * + * Expand the first skb in the chain and copy the content of the remaining + * skb's into the expanded one. After doing so, clear the chain. + * + * Returns the merged skb or NULL on error. + */ +static struct sk_buff * +batadv_frag_merge_packets(struct hlist_head *chain, struct sk_buff *skb) +{ + struct batadv_frag_packet *packet; + struct batadv_frag_list_entry *entry; + struct sk_buff *skb_out = NULL; + int size, hdr_size = sizeof(struct batadv_frag_packet); + + /* Make sure incoming skb has non-bogus data. */ + packet = (struct batadv_frag_packet *)skb->data; + size = ntohs(packet->total_size); + if (size > batadv_frag_size_limit()) + goto free; + + /* Remove first entry, as this is the destination for the rest of the + * fragments. + */ + entry = hlist_entry(chain->first, struct batadv_frag_list_entry, list); + hlist_del(&entry->list); + skb_out = entry->skb; + kfree(entry); + + /* Make room for the rest of the fragments. */ + if (pskb_expand_head(skb_out, 0, size - skb->len, GFP_ATOMIC) < 0) { + kfree_skb(skb_out); + skb_out = NULL; + goto free; + } + + /* Move the existing MAC header to just before the payload. (Override + * the fragment header.) + */ + skb_pull_rcsum(skb_out, hdr_size); + memmove(skb_out->data - ETH_HLEN, skb_mac_header(skb_out), ETH_HLEN); + skb_set_mac_header(skb_out, -ETH_HLEN); + skb_reset_network_header(skb_out); + skb_reset_transport_header(skb_out); + + /* Copy the payload of the each fragment into the last skb */ + hlist_for_each_entry(entry, chain, list) { + size = entry->skb->len - hdr_size; + memcpy(skb_put(skb_out, size), entry->skb->data + hdr_size, + size); + } + +free: + /* Locking is not needed, because 'chain' is not part of any orig. */ + batadv_frag_clear_chain(chain); + return skb_out; +} + +/** + * batadv_frag_skb_buffer - buffer fragment for later merge + * @skb: skb to buffer + * @orig_node_src: originator that the skb is received from + * + * Add fragment to buffer and merge fragments if possible. + * + * There are three possible outcomes: 1) Packet is merged: Return true and + * set *skb to merged packet; 2) Packet is buffered: Return true and set *skb + * to NULL; 3) Error: Return false and leave skb as is. + */ +bool batadv_frag_skb_buffer(struct sk_buff **skb, + struct batadv_orig_node *orig_node_src) +{ + struct sk_buff *skb_out = NULL; + struct hlist_head head = HLIST_HEAD_INIT; + bool ret = false; + + /* Add packet to buffer and table entry if merge is possible. */ + if (!batadv_frag_insert_packet(orig_node_src, *skb, &head)) + goto out_err; + + /* Leave if more fragments are needed to merge. */ + if (hlist_empty(&head)) + goto out; + + skb_out = batadv_frag_merge_packets(&head, *skb); + if (!skb_out) + goto out_err; + +out: + *skb = skb_out; + ret = true; +out_err: + return ret; +} + +/** + * batadv_frag_skb_fwd - forward fragments that would exceed MTU when merged + * @skb: skb to forward + * @recv_if: interface that the skb is received on + * @orig_node_src: originator that the skb is received from + * + * Look up the next-hop of the fragments payload and check if the merged packet + * will exceed the MTU towards the next-hop. If so, the fragment is forwarded + * without merging it. + * + * Returns true if the fragment is consumed/forwarded, false otherwise. + */ +bool batadv_frag_skb_fwd(struct sk_buff *skb, + struct batadv_hard_iface *recv_if, + struct batadv_orig_node *orig_node_src) +{ + struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); + struct batadv_orig_node *orig_node_dst = NULL; + struct batadv_neigh_node *neigh_node = NULL; + struct batadv_frag_packet *packet; + uint16_t total_size; + bool ret = false; + + packet = (struct batadv_frag_packet *)skb->data; + orig_node_dst = batadv_orig_hash_find(bat_priv, packet->dest); + if (!orig_node_dst) + goto out; + + neigh_node = batadv_find_router(bat_priv, orig_node_dst, recv_if); + if (!neigh_node) + goto out; + + /* Forward the fragment, if the merged packet would be too big to + * be assembled. + */ + total_size = ntohs(packet->total_size); + if (total_size > neigh_node->if_incoming->net_dev->mtu) { + batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_FWD); + batadv_add_counter(bat_priv, BATADV_CNT_FRAG_FWD_BYTES, + skb->len + ETH_HLEN); + + packet->header.ttl--; + batadv_send_skb_packet(skb, neigh_node->if_incoming, + neigh_node->addr); + ret = true; + } + +out: + if (orig_node_dst) + batadv_orig_node_free_ref(orig_node_dst); + if (neigh_node) + batadv_neigh_node_free_ref(neigh_node); + return ret; +} + +/** + * batadv_frag_create - create a fragment from skb + * @skb: skb to create fragment from + * @frag_head: header to use in new fragment + * @mtu: size of new fragment + * + * Split the passed skb into two fragments: A new one with size matching the + * passed mtu and the old one with the rest. The new skb contains data from the + * tail of the old skb. + * + * Returns the new fragment, NULL on error. + */ +static struct sk_buff *batadv_frag_create(struct sk_buff *skb, + struct batadv_frag_packet *frag_head, + unsigned int mtu) +{ + struct sk_buff *skb_fragment; + unsigned header_size = sizeof(*frag_head); + unsigned fragment_size = mtu - header_size; + + skb_fragment = netdev_alloc_skb(NULL, mtu + ETH_HLEN); + if (!skb_fragment) + goto err; + + skb->priority = TC_PRIO_CONTROL; + + /* Eat the last mtu-bytes of the skb */ + skb_reserve(skb_fragment, header_size + ETH_HLEN); + skb_split(skb, skb_fragment, skb->len - fragment_size); + + /* Add the header */ + skb_push(skb_fragment, header_size); + memcpy(skb_fragment->data, frag_head, header_size); + +err: + return skb_fragment; +} + +/** + * batadv_frag_send_packet - create up to 16 fragments from the passed skb + * @skb: skb to create fragments from + * @orig_node: final destination of the created fragments + * @neigh_node: next-hop of the created fragments + * + * Returns true on success, false otherwise. + */ +bool batadv_frag_send_packet(struct sk_buff *skb, + struct batadv_orig_node *orig_node, + struct batadv_neigh_node *neigh_node) +{ + struct batadv_priv *bat_priv; + struct batadv_hard_iface *primary_if; + struct batadv_frag_packet frag_header; + struct sk_buff *skb_fragment; + unsigned mtu = neigh_node->if_incoming->net_dev->mtu; + unsigned header_size = sizeof(frag_header); + unsigned max_fragment_size, max_packet_size; + + /* To avoid merge and refragmentation at next-hops we never send + * fragments larger than BATADV_FRAG_MAX_FRAG_SIZE + */ + mtu = min_t(unsigned, mtu, BATADV_FRAG_MAX_FRAG_SIZE); + max_fragment_size = (mtu - header_size - ETH_HLEN); + max_packet_size = max_fragment_size * BATADV_FRAG_MAX_FRAGMENTS; + + /* Don't even try to fragment, if we need more than 16 fragments */ + if (skb->len > max_packet_size) + goto out_err; + + bat_priv = orig_node->bat_priv; + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if) + goto out_err; + + /* Create one header to be copied to all fragments */ + frag_header.header.packet_type = BATADV_UNICAST_FRAG; + frag_header.header.version = BATADV_COMPAT_VERSION; + frag_header.header.ttl = BATADV_TTL; + frag_header.seqno = htons(atomic_inc_return(&bat_priv->frag_seqno)); + frag_header.reserved = 0; + frag_header.no = 0; + frag_header.total_size = htons(skb->len); + memcpy(frag_header.orig, primary_if->net_dev->dev_addr, ETH_ALEN); + memcpy(frag_header.dest, orig_node->orig, ETH_ALEN); + + /* Eat and send fragments from the tail of skb */ + while (skb->len > max_fragment_size) { + skb_fragment = batadv_frag_create(skb, &frag_header, mtu); + if (!skb_fragment) + goto out_err; + + batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX); + batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES, + skb_fragment->len + ETH_HLEN); + batadv_send_skb_packet(skb_fragment, neigh_node->if_incoming, + neigh_node->addr); + frag_header.no++; + + /* The initial check in this function should cover this case */ + if (frag_header.no == BATADV_FRAG_MAX_FRAGMENTS - 1) + goto out_err; + } + + /* Make room for the fragment header. */ + if (batadv_skb_head_push(skb, header_size) < 0 || + pskb_expand_head(skb, header_size + ETH_HLEN, 0, GFP_ATOMIC) < 0) + goto out_err; + + memcpy(skb->data, &frag_header, header_size); + + /* Send the last fragment */ + batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX); + batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES, + skb->len + ETH_HLEN); + batadv_send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); + + return true; +out_err: + return false; +} diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h new file mode 100644 index 000000000000..ca029e2676e7 --- /dev/null +++ b/net/batman-adv/fragmentation.h @@ -0,0 +1,50 @@ +/* Copyright (C) 2013 B.A.T.M.A.N. contributors: + * + * Martin Hundebøll <martin@hundeboll.net> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#ifndef _NET_BATMAN_ADV_FRAGMENTATION_H_ +#define _NET_BATMAN_ADV_FRAGMENTATION_H_ + +void batadv_frag_purge_orig(struct batadv_orig_node *orig, + bool (*check_cb)(struct batadv_frag_table_entry *)); +bool batadv_frag_skb_fwd(struct sk_buff *skb, + struct batadv_hard_iface *recv_if, + struct batadv_orig_node *orig_node_src); +bool batadv_frag_skb_buffer(struct sk_buff **skb, + struct batadv_orig_node *orig_node); +bool batadv_frag_send_packet(struct sk_buff *skb, + struct batadv_orig_node *orig_node, + struct batadv_neigh_node *neigh_node); + +/** + * batadv_frag_check_entry - check if a list of fragments has timed out + * @frags_entry: table entry to check + * + * Returns true if the frags entry has timed out, false otherwise. + */ +static inline bool +batadv_frag_check_entry(struct batadv_frag_table_entry *frags_entry) +{ + if (!hlist_empty(&frags_entry->head) && + batadv_has_timed_out(frags_entry->timestamp, BATADV_FRAG_TIMEOUT)) + return true; + else + return false; +} + +#endif /* _NET_BATMAN_ADV_FRAGMENTATION_H_ */ diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 1bce63aa5f5f..053bb318c7a7 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -655,24 +655,29 @@ bool batadv_gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len) struct iphdr *iphdr; struct ipv6hdr *ipv6hdr; struct udphdr *udphdr; + struct vlan_ethhdr *vhdr; + __be16 proto; /* check for ethernet header */ if (!pskb_may_pull(skb, *header_len + ETH_HLEN)) return false; ethhdr = (struct ethhdr *)skb->data; + proto = ethhdr->h_proto; *header_len += ETH_HLEN; /* check for initial vlan header */ - if (ntohs(ethhdr->h_proto) == ETH_P_8021Q) { + if (proto == htons(ETH_P_8021Q)) { if (!pskb_may_pull(skb, *header_len + VLAN_HLEN)) return false; - ethhdr = (struct ethhdr *)(skb->data + VLAN_HLEN); + + vhdr = (struct vlan_ethhdr *)skb->data; + proto = vhdr->h_vlan_encapsulated_proto; *header_len += VLAN_HLEN; } /* check for ip header */ - switch (ntohs(ethhdr->h_proto)) { - case ETH_P_IP: + switch (proto) { + case htons(ETH_P_IP): if (!pskb_may_pull(skb, *header_len + sizeof(*iphdr))) return false; iphdr = (struct iphdr *)(skb->data + *header_len); @@ -683,7 +688,7 @@ bool batadv_gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len) return false; break; - case ETH_P_IPV6: + case htons(ETH_P_IPV6): if (!pskb_may_pull(skb, *header_len + sizeof(*ipv6hdr))) return false; ipv6hdr = (struct ipv6hdr *)(skb->data + *header_len); @@ -710,12 +715,12 @@ bool batadv_gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len) *header_len += sizeof(*udphdr); /* check for bootp port */ - if ((ntohs(ethhdr->h_proto) == ETH_P_IP) && - (ntohs(udphdr->dest) != 67)) + if ((proto == htons(ETH_P_IP)) && + (udphdr->dest != htons(67))) return false; - if ((ntohs(ethhdr->h_proto) == ETH_P_IPV6) && - (ntohs(udphdr->dest) != 547)) + if ((proto == htons(ETH_P_IPV6)) && + (udphdr->dest != htons(547))) return false; return true; diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index eeb667112d64..d564af295db4 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -269,9 +269,10 @@ int batadv_hardif_min_mtu(struct net_device *soft_iface) const struct batadv_priv *bat_priv = netdev_priv(soft_iface); const struct batadv_hard_iface *hard_iface; /* allow big frames if all devices are capable to do so - * (have MTU > 1500 + BAT_HEADER_LEN) + * (have MTU > 1500 + batadv_max_header_len()) */ int min_mtu = ETH_DATA_LEN; + int max_header_len = batadv_max_header_len(); if (atomic_read(&bat_priv->fragmentation)) goto out; @@ -285,8 +286,7 @@ int batadv_hardif_min_mtu(struct net_device *soft_iface) if (hard_iface->soft_iface != soft_iface) continue; - min_mtu = min_t(int, - hard_iface->net_dev->mtu - BATADV_HEADER_LEN, + min_mtu = min_t(int, hard_iface->net_dev->mtu - max_header_len, min_mtu); } rcu_read_unlock(); @@ -379,7 +379,8 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, { struct batadv_priv *bat_priv; struct net_device *soft_iface, *master; - __be16 ethertype = __constant_htons(ETH_P_BATMAN); + __be16 ethertype = htons(ETH_P_BATMAN); + int max_header_len = batadv_max_header_len(); int ret; if (hard_iface->if_status != BATADV_IF_NOT_IN_USE) @@ -444,23 +445,22 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, hard_iface->batman_adv_ptype.dev = hard_iface->net_dev; dev_add_pack(&hard_iface->batman_adv_ptype); - atomic_set(&hard_iface->frag_seqno, 1); batadv_info(hard_iface->soft_iface, "Adding interface: %s\n", hard_iface->net_dev->name); if (atomic_read(&bat_priv->fragmentation) && - hard_iface->net_dev->mtu < ETH_DATA_LEN + BATADV_HEADER_LEN) + hard_iface->net_dev->mtu < ETH_DATA_LEN + max_header_len) batadv_info(hard_iface->soft_iface, - "The MTU of interface %s is too small (%i) to handle the transport of batman-adv packets. Packets going over this interface will be fragmented on layer2 which could impact the performance. Setting the MTU to %zi would solve the problem.\n", + "The MTU of interface %s is too small (%i) to handle the transport of batman-adv packets. Packets going over this interface will be fragmented on layer2 which could impact the performance. Setting the MTU to %i would solve the problem.\n", hard_iface->net_dev->name, hard_iface->net_dev->mtu, - ETH_DATA_LEN + BATADV_HEADER_LEN); + ETH_DATA_LEN + max_header_len); if (!atomic_read(&bat_priv->fragmentation) && - hard_iface->net_dev->mtu < ETH_DATA_LEN + BATADV_HEADER_LEN) + hard_iface->net_dev->mtu < ETH_DATA_LEN + max_header_len) batadv_info(hard_iface->soft_iface, - "The MTU of interface %s is too small (%i) to handle the transport of batman-adv packets. If you experience problems getting traffic through try increasing the MTU to %zi.\n", + "The MTU of interface %s is too small (%i) to handle the transport of batman-adv packets. If you experience problems getting traffic through try increasing the MTU to %i.\n", hard_iface->net_dev->name, hard_iface->net_dev->mtu, - ETH_DATA_LEN + BATADV_HEADER_LEN); + ETH_DATA_LEN + max_header_len); if (batadv_hardif_is_iface_up(hard_iface)) batadv_hardif_activate_interface(hard_iface); diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index 5a99bb4b6b82..82ac6472fa6f 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -192,25 +192,25 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff, goto free_skb; } - if (icmp_packet->header.packet_type != BATADV_ICMP) { + if (icmp_packet->icmph.header.packet_type != BATADV_ICMP) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Error - can't send packet from char device: got bogus packet type (expected: BAT_ICMP)\n"); len = -EINVAL; goto free_skb; } - if (icmp_packet->msg_type != BATADV_ECHO_REQUEST) { + if (icmp_packet->icmph.msg_type != BATADV_ECHO_REQUEST) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Error - can't send packet from char device: got bogus message type (expected: ECHO_REQUEST)\n"); len = -EINVAL; goto free_skb; } - icmp_packet->uid = socket_client->index; + icmp_packet->icmph.uid = socket_client->index; - if (icmp_packet->header.version != BATADV_COMPAT_VERSION) { - icmp_packet->msg_type = BATADV_PARAMETER_PROBLEM; - icmp_packet->header.version = BATADV_COMPAT_VERSION; + if (icmp_packet->icmph.header.version != BATADV_COMPAT_VERSION) { + icmp_packet->icmph.msg_type = BATADV_PARAMETER_PROBLEM; + icmp_packet->icmph.header.version = BATADV_COMPAT_VERSION; batadv_socket_add_packet(socket_client, icmp_packet, packet_len); goto free_skb; @@ -219,7 +219,7 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff, if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) goto dst_unreach; - orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->dst); + orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->icmph.dst); if (!orig_node) goto dst_unreach; @@ -233,7 +233,7 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff, if (neigh_node->if_incoming->if_status != BATADV_IF_ACTIVE) goto dst_unreach; - memcpy(icmp_packet->orig, + memcpy(icmp_packet->icmph.orig, primary_if->net_dev->dev_addr, ETH_ALEN); if (packet_len == sizeof(struct batadv_icmp_packet_rr)) @@ -244,7 +244,7 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff, goto out; dst_unreach: - icmp_packet->msg_type = BATADV_DESTINATION_UNREACHABLE; + icmp_packet->icmph.msg_type = BATADV_DESTINATION_UNREACHABLE; batadv_socket_add_packet(socket_client, icmp_packet, packet_len); free_skb: kfree_skb(skb); @@ -318,7 +318,7 @@ static void batadv_socket_add_packet(struct batadv_socket_client *socket_client, /* while waiting for the lock the socket_client could have been * deleted */ - if (!batadv_socket_client_hash[icmp_packet->uid]) { + if (!batadv_socket_client_hash[icmp_packet->icmph.uid]) { spin_unlock_bh(&socket_client->lock); kfree(socket_packet); return; @@ -347,7 +347,7 @@ void batadv_socket_receive_packet(struct batadv_icmp_packet_rr *icmp_packet, { struct batadv_socket_client *hash; - hash = batadv_socket_client_hash[icmp_packet->uid]; + hash = batadv_socket_client_hash[icmp_packet->icmph.uid]; if (hash) batadv_socket_add_packet(hash, icmp_packet, icmp_len); } diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 8b195e63e70e..7f3a5c426615 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -36,11 +36,11 @@ #include "gateway_client.h" #include "bridge_loop_avoidance.h" #include "distributed-arp-table.h" -#include "unicast.h" #include "gateway_common.h" #include "hash.h" #include "bat_algo.h" #include "network-coding.h" +#include "fragmentation.h" /* List manipulations on hardif_list have to be rtnl_lock()'ed, @@ -256,6 +256,31 @@ out: } /** + * batadv_max_header_len - calculate maximum encapsulation overhead for a + * payload packet + * + * Return the maximum encapsulation overhead in bytes. + */ +int batadv_max_header_len(void) +{ + int header_len = 0; + + header_len = max_t(int, header_len, + sizeof(struct batadv_unicast_packet)); + header_len = max_t(int, header_len, + sizeof(struct batadv_unicast_4addr_packet)); + header_len = max_t(int, header_len, + sizeof(struct batadv_bcast_packet)); + +#ifdef CONFIG_BATMAN_ADV_NC + header_len = max_t(int, header_len, + sizeof(struct batadv_coded_packet)); +#endif + + return header_len; +} + +/** * batadv_skb_set_priority - sets skb priority according to packet content * @skb: the packet to be sent * @offset: offset to the packet content @@ -399,10 +424,10 @@ static void batadv_recv_handler_init(void) /* compile time checks for struct member offsets */ BUILD_BUG_ON(offsetof(struct batadv_unicast_4addr_packet, src) != 10); BUILD_BUG_ON(offsetof(struct batadv_unicast_packet, dest) != 4); - BUILD_BUG_ON(offsetof(struct batadv_unicast_frag_packet, dest) != 4); BUILD_BUG_ON(offsetof(struct batadv_unicast_tvlv_packet, dst) != 4); - BUILD_BUG_ON(offsetof(struct batadv_icmp_packet, dst) != 4); - BUILD_BUG_ON(offsetof(struct batadv_icmp_packet_rr, dst) != 4); + BUILD_BUG_ON(offsetof(struct batadv_frag_packet, dest) != 4); + BUILD_BUG_ON(offsetof(struct batadv_icmp_packet, icmph.dst) != 4); + BUILD_BUG_ON(offsetof(struct batadv_icmp_packet_rr, icmph.dst) != 4); /* broadcast packet */ batadv_rx_handler[BATADV_BCAST] = batadv_recv_bcast_packet; @@ -412,12 +437,12 @@ static void batadv_recv_handler_init(void) batadv_rx_handler[BATADV_UNICAST_4ADDR] = batadv_recv_unicast_packet; /* unicast packet */ batadv_rx_handler[BATADV_UNICAST] = batadv_recv_unicast_packet; - /* fragmented unicast packet */ - batadv_rx_handler[BATADV_UNICAST_FRAG] = batadv_recv_ucast_frag_packet; /* unicast tvlv packet */ batadv_rx_handler[BATADV_UNICAST_TVLV] = batadv_recv_unicast_tvlv; /* batman icmp packet */ batadv_rx_handler[BATADV_ICMP] = batadv_recv_icmp_packet; + /* Fragmented packets */ + batadv_rx_handler[BATADV_UNICAST_FRAG] = batadv_recv_frag_packet; } int diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index e11c2ec7a739..54c13d51edbe 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -131,6 +131,15 @@ enum batadv_uev_type { #define BATADV_GW_THRESHOLD 50 +/* Number of fragment chains for each orig_node */ +#define BATADV_FRAG_BUFFER_COUNT 8 +/* Maximum number of fragments for one packet */ +#define BATADV_FRAG_MAX_FRAGMENTS 16 +/* Maxumim size of each fragment */ +#define BATADV_FRAG_MAX_FRAG_SIZE 1400 +/* Time to keep fragments while waiting for rest of the fragments */ +#define BATADV_FRAG_TIMEOUT 10000 + #define BATADV_DAT_CANDIDATE_NOT_FOUND 0 #define BATADV_DAT_CANDIDATE_ORIG 1 @@ -182,6 +191,7 @@ void batadv_mesh_free(struct net_device *soft_iface); int batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr); struct batadv_hard_iface * batadv_seq_print_text_primary_if_get(struct seq_file *seq); +int batadv_max_header_len(void); void batadv_skb_set_priority(struct sk_buff *skb, int offset); int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 5d53d2f38377..a591dc5c321e 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -25,10 +25,10 @@ #include "routing.h" #include "gateway_client.h" #include "hard-interface.h" -#include "unicast.h" #include "soft-interface.h" #include "bridge_loop_avoidance.h" #include "network-coding.h" +#include "fragmentation.h" /* hash class keys */ static struct lock_class_key batadv_orig_hash_lock_class_key; @@ -146,7 +146,8 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu) /* Free nc_nodes */ batadv_nc_purge_orig(orig_node->bat_priv, orig_node, NULL); - batadv_frag_list_free(&orig_node->frag_list); + batadv_frag_purge_orig(orig_node, NULL); + batadv_tt_global_del_orig(orig_node->bat_priv, orig_node, "originator timed out"); @@ -217,7 +218,7 @@ struct batadv_orig_node *batadv_get_orig_node(struct batadv_priv *bat_priv, const uint8_t *addr) { struct batadv_orig_node *orig_node; - int size; + int size, i; int hash_added; unsigned long reset_time; @@ -269,8 +270,11 @@ struct batadv_orig_node *batadv_get_orig_node(struct batadv_priv *bat_priv, size = bat_priv->num_ifaces * sizeof(uint8_t); orig_node->bcast_own_sum = kzalloc(size, GFP_ATOMIC); - INIT_LIST_HEAD(&orig_node->frag_list); - orig_node->last_frag_packet = 0; + for (i = 0; i < BATADV_FRAG_BUFFER_COUNT; i++) { + INIT_HLIST_HEAD(&orig_node->fragments[i].head); + spin_lock_init(&orig_node->fragments[i].lock); + orig_node->fragments[i].size = 0; + } if (!orig_node->bcast_own_sum) goto free_bcast_own; @@ -394,9 +398,8 @@ static void _batadv_purge_orig(struct batadv_priv *bat_priv) continue; } - if (batadv_has_timed_out(orig_node->last_frag_packet, - BATADV_FRAG_TIMEOUT)) - batadv_frag_list_free(&orig_node->frag_list); + batadv_frag_purge_orig(orig_node, + batadv_frag_check_entry); } spin_unlock_bh(list_lock); } diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index 4361bae6186a..65e723ed030b 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -91,12 +91,6 @@ enum batadv_icmp_packettype { BATADV_PARAMETER_PROBLEM = 12, }; -/* fragmentation defines */ -enum batadv_unicast_frag_flags { - BATADV_UNI_FRAG_HEAD = BIT(0), - BATADV_UNI_FRAG_LARGETAIL = BIT(1), -}; - /* tt data subtypes */ #define BATADV_TT_DATA_TYPE_MASK 0x0F @@ -192,29 +186,47 @@ struct batadv_ogm_packet { #define BATADV_OGM_HLEN sizeof(struct batadv_ogm_packet) -struct batadv_icmp_packet { +/** + * batadv_icmp_header - common ICMP header + * @header: common batman header + * @msg_type: ICMP packet type + * @dst: address of the destination node + * @orig: address of the source node + * @uid: local ICMP socket identifier + */ +struct batadv_icmp_header { struct batadv_header header; uint8_t msg_type; /* see ICMP message types above */ uint8_t dst[ETH_ALEN]; uint8_t orig[ETH_ALEN]; - __be16 seqno; uint8_t uid; +}; + +/** + * batadv_icmp_packet - ICMP packet + * @icmph: common ICMP header + * @reserved: not used - useful for alignment + * @seqno: ICMP sequence number + */ +struct batadv_icmp_packet { + struct batadv_icmp_header icmph; uint8_t reserved; + __be16 seqno; }; #define BATADV_RR_LEN 16 -/* icmp_packet_rr must start with all fields from imcp_packet - * as this is assumed by code that handles ICMP packets +/** + * batadv_icmp_packet_rr - ICMP RouteRecord packet + * @icmph: common ICMP header + * @rr_cur: number of entries the rr array + * @seqno: ICMP sequence number + * @rr: route record array */ struct batadv_icmp_packet_rr { - struct batadv_header header; - uint8_t msg_type; /* see ICMP message types above */ - uint8_t dst[ETH_ALEN]; - uint8_t orig[ETH_ALEN]; - __be16 seqno; - uint8_t uid; + struct batadv_icmp_header icmph; uint8_t rr_cur; + __be16 seqno; uint8_t rr[BATADV_RR_LEN][ETH_ALEN]; }; @@ -255,15 +267,32 @@ struct batadv_unicast_4addr_packet { */ }; -struct batadv_unicast_frag_packet { - struct batadv_header header; - uint8_t ttvn; /* destination translation table version number */ - uint8_t dest[ETH_ALEN]; - uint8_t flags; - uint8_t align; - uint8_t orig[ETH_ALEN]; - __be16 seqno; -} __packed; +/** + * struct batadv_frag_packet - fragmented packet + * @header: common batman packet header with type, compatversion, and ttl + * @dest: final destination used when routing fragments + * @orig: originator of the fragment used when merging the packet + * @no: fragment number within this sequence + * @reserved: reserved byte for alignment + * @seqno: sequence identification + * @total_size: size of the merged packet + */ +struct batadv_frag_packet { + struct batadv_header header; +#if defined(__BIG_ENDIAN_BITFIELD) + uint8_t no:4; + uint8_t reserved:4; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + uint8_t reserved:4; + uint8_t no:4; +#else +#error "unknown bitfield endianess" +#endif + uint8_t dest[ETH_ALEN]; + uint8_t orig[ETH_ALEN]; + __be16 seqno; + __be16 total_size; +}; struct batadv_bcast_packet { struct batadv_header header; diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 457dfef9c5fc..3281a504c20a 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -25,10 +25,10 @@ #include "icmp_socket.h" #include "translation-table.h" #include "originator.h" -#include "unicast.h" #include "bridge_loop_avoidance.h" #include "distributed-arp-table.h" #include "network-coding.h" +#include "fragmentation.h" static int batadv_route_unicast_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if); @@ -258,7 +258,7 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv, icmp_packet = (struct batadv_icmp_packet_rr *)skb->data; /* add data to device queue */ - if (icmp_packet->msg_type != BATADV_ECHO_REQUEST) { + if (icmp_packet->icmph.msg_type != BATADV_ECHO_REQUEST) { batadv_socket_receive_packet(icmp_packet, icmp_len); goto out; } @@ -269,7 +269,7 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv, /* answer echo request (ping) */ /* get routing information */ - orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->orig); + orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->icmph.orig); if (!orig_node) goto out; @@ -279,10 +279,11 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv, icmp_packet = (struct batadv_icmp_packet_rr *)skb->data; - memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN); - memcpy(icmp_packet->orig, primary_if->net_dev->dev_addr, ETH_ALEN); - icmp_packet->msg_type = BATADV_ECHO_REPLY; - icmp_packet->header.ttl = BATADV_TTL; + memcpy(icmp_packet->icmph.dst, icmp_packet->icmph.orig, ETH_ALEN); + memcpy(icmp_packet->icmph.orig, primary_if->net_dev->dev_addr, + ETH_ALEN); + icmp_packet->icmph.msg_type = BATADV_ECHO_REPLY; + icmp_packet->icmph.header.ttl = BATADV_TTL; if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) ret = NET_RX_SUCCESS; @@ -306,9 +307,9 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv, icmp_packet = (struct batadv_icmp_packet *)skb->data; /* send TTL exceeded if packet is an echo request (traceroute) */ - if (icmp_packet->msg_type != BATADV_ECHO_REQUEST) { + if (icmp_packet->icmph.msg_type != BATADV_ECHO_REQUEST) { pr_debug("Warning - can't forward icmp packet from %pM to %pM: ttl exceeded\n", - icmp_packet->orig, icmp_packet->dst); + icmp_packet->icmph.orig, icmp_packet->icmph.dst); goto out; } @@ -317,7 +318,7 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv, goto out; /* get routing information */ - orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->orig); + orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->icmph.orig); if (!orig_node) goto out; @@ -327,10 +328,11 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv, icmp_packet = (struct batadv_icmp_packet *)skb->data; - memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN); - memcpy(icmp_packet->orig, primary_if->net_dev->dev_addr, ETH_ALEN); - icmp_packet->msg_type = BATADV_TTL_EXCEEDED; - icmp_packet->header.ttl = BATADV_TTL; + memcpy(icmp_packet->icmph.dst, icmp_packet->icmph.orig, ETH_ALEN); + memcpy(icmp_packet->icmph.orig, primary_if->net_dev->dev_addr, + ETH_ALEN); + icmp_packet->icmph.msg_type = BATADV_TTL_EXCEEDED; + icmp_packet->icmph.header.ttl = BATADV_TTL; if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) ret = NET_RX_SUCCESS; @@ -379,7 +381,9 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, icmp_packet = (struct batadv_icmp_packet_rr *)skb->data; /* add record route information if not full */ - if ((hdr_size == sizeof(struct batadv_icmp_packet_rr)) && + if ((icmp_packet->icmph.msg_type == BATADV_ECHO_REPLY || + icmp_packet->icmph.msg_type == BATADV_ECHO_REQUEST) && + (hdr_size == sizeof(struct batadv_icmp_packet_rr)) && (icmp_packet->rr_cur < BATADV_RR_LEN)) { memcpy(&(icmp_packet->rr[icmp_packet->rr_cur]), ethhdr->h_dest, ETH_ALEN); @@ -387,15 +391,15 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, } /* packet for me */ - if (batadv_is_my_mac(bat_priv, icmp_packet->dst)) + if (batadv_is_my_mac(bat_priv, icmp_packet->icmph.dst)) return batadv_recv_my_icmp_packet(bat_priv, skb, hdr_size); /* TTL exceeded */ - if (icmp_packet->header.ttl < 2) + if (icmp_packet->icmph.header.ttl < 2) return batadv_recv_icmp_ttl_exceeded(bat_priv, skb); /* get routing information */ - orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->dst); + orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->icmph.dst); if (!orig_node) goto out; @@ -406,7 +410,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, icmp_packet = (struct batadv_icmp_packet_rr *)skb->data; /* decrement ttl */ - icmp_packet->header.ttl--; + icmp_packet->icmph.header.ttl--; /* route it */ if (batadv_send_skb_to_orig(skb, orig_node, recv_if) != NET_XMIT_DROP) @@ -651,11 +655,9 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, { struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); struct batadv_orig_node *orig_node = NULL; - struct batadv_neigh_node *neigh_node = NULL; struct batadv_unicast_packet *unicast_packet; struct ethhdr *ethhdr = eth_hdr(skb); int res, hdr_len, ret = NET_RX_DROP; - struct sk_buff *new_skb; unicast_packet = (struct batadv_unicast_packet *)skb->data; @@ -672,46 +674,12 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, if (!orig_node) goto out; - /* find_router() increases neigh_nodes refcount if found. */ - neigh_node = batadv_find_router(bat_priv, orig_node, recv_if); - - if (!neigh_node) - goto out; - /* create a copy of the skb, if needed, to modify it. */ if (skb_cow(skb, ETH_HLEN) < 0) goto out; - unicast_packet = (struct batadv_unicast_packet *)skb->data; - - if (unicast_packet->header.packet_type == BATADV_UNICAST && - atomic_read(&bat_priv->fragmentation) && - skb->len > neigh_node->if_incoming->net_dev->mtu) { - ret = batadv_frag_send_skb(skb, bat_priv, - neigh_node->if_incoming, - neigh_node->addr); - goto out; - } - - if (unicast_packet->header.packet_type == BATADV_UNICAST_FRAG && - batadv_frag_can_reassemble(skb, - neigh_node->if_incoming->net_dev->mtu)) { - ret = batadv_frag_reassemble_skb(skb, bat_priv, &new_skb); - - if (ret == NET_RX_DROP) - goto out; - - /* packet was buffered for late merge */ - if (!new_skb) { - ret = NET_RX_SUCCESS; - goto out; - } - - skb = new_skb; - unicast_packet = (struct batadv_unicast_packet *)skb->data; - } - /* decrement ttl */ + unicast_packet = (struct batadv_unicast_packet *)skb->data; unicast_packet->header.ttl--; switch (unicast_packet->header.packet_type) { @@ -746,8 +714,6 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, } out: - if (neigh_node) - batadv_neigh_node_free_ref(neigh_node); if (orig_node) batadv_orig_node_free_ref(orig_node); return ret; @@ -1001,51 +967,6 @@ rx_success: return batadv_route_unicast_packet(skb, recv_if); } -int batadv_recv_ucast_frag_packet(struct sk_buff *skb, - struct batadv_hard_iface *recv_if) -{ - struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); - struct batadv_unicast_frag_packet *unicast_packet; - int hdr_size = sizeof(*unicast_packet); - struct sk_buff *new_skb = NULL; - int ret; - - if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0) - return NET_RX_DROP; - - if (!batadv_check_unicast_ttvn(bat_priv, skb, hdr_size)) - return NET_RX_DROP; - - unicast_packet = (struct batadv_unicast_frag_packet *)skb->data; - - /* packet for me */ - if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) { - ret = batadv_frag_reassemble_skb(skb, bat_priv, &new_skb); - - if (ret == NET_RX_DROP) - return NET_RX_DROP; - - /* packet was buffered for late merge */ - if (!new_skb) - return NET_RX_SUCCESS; - - if (batadv_dat_snoop_incoming_arp_request(bat_priv, new_skb, - hdr_size)) - goto rx_success; - if (batadv_dat_snoop_incoming_arp_reply(bat_priv, new_skb, - hdr_size)) - goto rx_success; - - batadv_interface_rx(recv_if->soft_iface, new_skb, recv_if, - sizeof(struct batadv_unicast_packet), NULL); - -rx_success: - return NET_RX_SUCCESS; - } - - return batadv_route_unicast_packet(skb, recv_if); -} - /** * batadv_recv_unicast_tvlv - receive and process unicast tvlv packets * @skb: unicast tvlv packet to process @@ -1095,6 +1016,64 @@ int batadv_recv_unicast_tvlv(struct sk_buff *skb, return ret; } +/** + * batadv_recv_frag_packet - process received fragment + * @skb: the received fragment + * @recv_if: interface that the skb is received on + * + * This function does one of the three following things: 1) Forward fragment, if + * the assembled packet will exceed our MTU; 2) Buffer fragment, if we till + * lack further fragments; 3) Merge fragments, if we have all needed parts. + * + * Return NET_RX_DROP if the skb is not consumed, NET_RX_SUCCESS otherwise. + */ +int batadv_recv_frag_packet(struct sk_buff *skb, + struct batadv_hard_iface *recv_if) +{ + struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); + struct batadv_orig_node *orig_node_src = NULL; + struct batadv_frag_packet *frag_packet; + int ret = NET_RX_DROP; + + if (batadv_check_unicast_packet(bat_priv, skb, + sizeof(*frag_packet)) < 0) + goto out; + + frag_packet = (struct batadv_frag_packet *)skb->data; + orig_node_src = batadv_orig_hash_find(bat_priv, frag_packet->orig); + if (!orig_node_src) + goto out; + + /* Route the fragment if it is not for us and too big to be merged. */ + if (!batadv_is_my_mac(bat_priv, frag_packet->dest) && + batadv_frag_skb_fwd(skb, recv_if, orig_node_src)) { + ret = NET_RX_SUCCESS; + goto out; + } + + batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_RX); + batadv_add_counter(bat_priv, BATADV_CNT_FRAG_RX_BYTES, skb->len); + + /* Add fragment to buffer and merge if possible. */ + if (!batadv_frag_skb_buffer(&skb, orig_node_src)) + goto out; + + /* Deliver merged packet to the appropriate handler, if it was + * merged + */ + if (skb) + batadv_batman_skb_recv(skb, recv_if->net_dev, + &recv_if->batman_adv_ptype, NULL); + + ret = NET_RX_SUCCESS; + +out: + if (orig_node_src) + batadv_orig_node_free_ref(orig_node_src); + + return ret; +} + int batadv_recv_bcast_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if) { diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h index ea15fa6302ad..55d637a90621 100644 --- a/net/batman-adv/routing.h +++ b/net/batman-adv/routing.h @@ -30,8 +30,8 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if); int batadv_recv_unicast_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if); -int batadv_recv_ucast_frag_packet(struct sk_buff *skb, - struct batadv_hard_iface *recv_if); +int batadv_recv_frag_packet(struct sk_buff *skb, + struct batadv_hard_iface *iface); int batadv_recv_bcast_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if); int batadv_recv_tt_query(struct sk_buff *skb, diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 81d69fb97c17..82588e425641 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -25,10 +25,10 @@ #include "soft-interface.h" #include "hard-interface.h" #include "gateway_common.h" +#include "gateway_client.h" #include "originator.h" #include "network-coding.h" - -#include <linux/if_ether.h> +#include "fragmentation.h" static void batadv_send_outstanding_bcast_packet(struct work_struct *work); @@ -63,10 +63,10 @@ int batadv_send_skb_packet(struct sk_buff *skb, ethhdr = eth_hdr(skb); memcpy(ethhdr->h_source, hard_iface->net_dev->dev_addr, ETH_ALEN); memcpy(ethhdr->h_dest, dst_addr, ETH_ALEN); - ethhdr->h_proto = __constant_htons(ETH_P_BATMAN); + ethhdr->h_proto = htons(ETH_P_BATMAN); skb_set_network_header(skb, ETH_HLEN); - skb->protocol = __constant_htons(ETH_P_BATMAN); + skb->protocol = htons(ETH_P_BATMAN); skb->dev = hard_iface->net_dev; @@ -108,7 +108,19 @@ int batadv_send_skb_to_orig(struct sk_buff *skb, /* batadv_find_router() increases neigh_nodes refcount if found. */ neigh_node = batadv_find_router(bat_priv, orig_node, recv_if); if (!neigh_node) - return ret; + goto out; + + /* Check if the skb is too large to send in one piece and fragment + * it if needed. + */ + if (atomic_read(&bat_priv->fragmentation) && + skb->len > neigh_node->if_incoming->net_dev->mtu) { + /* Fragment and send packet. */ + if (batadv_frag_send_packet(skb, orig_node, neigh_node)) + ret = NET_XMIT_SUCCESS; + + goto out; + } /* try to network code the packet, if it is received on an interface * (i.e. being forwarded). If the packet originates from this node or if @@ -122,8 +134,170 @@ int batadv_send_skb_to_orig(struct sk_buff *skb, ret = NET_XMIT_SUCCESS; } - batadv_neigh_node_free_ref(neigh_node); +out: + if (neigh_node) + batadv_neigh_node_free_ref(neigh_node); + + return ret; +} +/** + * batadv_send_skb_push_fill_unicast - extend the buffer and initialize the + * common fields for unicast packets + * @skb: the skb carrying the unicast header to initialize + * @hdr_size: amount of bytes to push at the beginning of the skb + * @orig_node: the destination node + * + * Returns false if the buffer extension was not possible or true otherwise. + */ +static bool +batadv_send_skb_push_fill_unicast(struct sk_buff *skb, int hdr_size, + struct batadv_orig_node *orig_node) +{ + struct batadv_unicast_packet *unicast_packet; + uint8_t ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn); + + if (batadv_skb_head_push(skb, hdr_size) < 0) + return false; + + unicast_packet = (struct batadv_unicast_packet *)skb->data; + unicast_packet->header.version = BATADV_COMPAT_VERSION; + /* batman packet type: unicast */ + unicast_packet->header.packet_type = BATADV_UNICAST; + /* set unicast ttl */ + unicast_packet->header.ttl = BATADV_TTL; + /* copy the destination for faster routing */ + memcpy(unicast_packet->dest, orig_node->orig, ETH_ALEN); + /* set the destination tt version number */ + unicast_packet->ttvn = ttvn; + + return true; +} + +/** + * batadv_send_skb_prepare_unicast - encapsulate an skb with a unicast header + * @skb: the skb containing the payload to encapsulate + * @orig_node: the destination node + * + * Returns false if the payload could not be encapsulated or true otherwise. + */ +static bool batadv_send_skb_prepare_unicast(struct sk_buff *skb, + struct batadv_orig_node *orig_node) +{ + size_t uni_size = sizeof(struct batadv_unicast_packet); + + return batadv_send_skb_push_fill_unicast(skb, uni_size, orig_node); +} + +/** + * batadv_send_skb_prepare_unicast_4addr - encapsulate an skb with a + * unicast 4addr header + * @bat_priv: the bat priv with all the soft interface information + * @skb: the skb containing the payload to encapsulate + * @orig_node: the destination node + * @packet_subtype: the unicast 4addr packet subtype to use + * + * Returns false if the payload could not be encapsulated or true otherwise. + */ +bool batadv_send_skb_prepare_unicast_4addr(struct batadv_priv *bat_priv, + struct sk_buff *skb, + struct batadv_orig_node *orig, + int packet_subtype) +{ + struct batadv_hard_iface *primary_if; + struct batadv_unicast_4addr_packet *uc_4addr_packet; + bool ret = false; + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if) + goto out; + + /* Pull the header space and fill the unicast_packet substructure. + * We can do that because the first member of the uc_4addr_packet + * is of type struct unicast_packet + */ + if (!batadv_send_skb_push_fill_unicast(skb, sizeof(*uc_4addr_packet), + orig)) + goto out; + + uc_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; + uc_4addr_packet->u.header.packet_type = BATADV_UNICAST_4ADDR; + memcpy(uc_4addr_packet->src, primary_if->net_dev->dev_addr, ETH_ALEN); + uc_4addr_packet->subtype = packet_subtype; + uc_4addr_packet->reserved = 0; + + ret = true; +out: + if (primary_if) + batadv_hardif_free_ref(primary_if); + return ret; +} + +/** + * batadv_send_generic_unicast_skb - send an skb as unicast + * @bat_priv: the bat priv with all the soft interface information + * @skb: payload to send + * @packet_type: the batman unicast packet type to use + * @packet_subtype: the unicast 4addr packet subtype (only relevant for unicast + * 4addr packets) + * + * Returns 1 in case of error or 0 otherwise. + */ +int batadv_send_skb_generic_unicast(struct batadv_priv *bat_priv, + struct sk_buff *skb, int packet_type, + int packet_subtype) +{ + struct ethhdr *ethhdr = (struct ethhdr *)skb->data; + struct batadv_unicast_packet *unicast_packet; + struct batadv_orig_node *orig_node; + int ret = NET_RX_DROP; + + /* get routing information */ + if (is_multicast_ether_addr(ethhdr->h_dest)) + orig_node = batadv_gw_get_selected_orig(bat_priv); + else + /* check for tt host - increases orig_node refcount. + * returns NULL in case of AP isolation + */ + orig_node = batadv_transtable_search(bat_priv, ethhdr->h_source, + ethhdr->h_dest); + + if (!orig_node) + goto out; + + switch (packet_type) { + case BATADV_UNICAST: + batadv_send_skb_prepare_unicast(skb, orig_node); + break; + case BATADV_UNICAST_4ADDR: + batadv_send_skb_prepare_unicast_4addr(bat_priv, skb, orig_node, + packet_subtype); + break; + default: + /* this function supports UNICAST and UNICAST_4ADDR only. It + * should never be invoked with any other packet type + */ + goto out; + } + + unicast_packet = (struct batadv_unicast_packet *)skb->data; + + /* inform the destination node that we are still missing a correct route + * for this client. The destination will receive this packet and will + * try to reroute it because the ttvn contained in the header is less + * than the current one + */ + if (batadv_tt_global_client_is_roaming(bat_priv, ethhdr->h_dest)) + unicast_packet->ttvn = unicast_packet->ttvn - 1; + + if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) + ret = 0; + +out: + if (orig_node) + batadv_orig_node_free_ref(orig_node); + if (ret == NET_RX_DROP) + kfree_skb(skb); return ret; } diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index e7b17880fca4..ad63184a4dd9 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -34,5 +34,45 @@ void batadv_send_outstanding_bat_ogm_packet(struct work_struct *work); void batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, const struct batadv_hard_iface *hard_iface); +bool batadv_send_skb_prepare_unicast_4addr(struct batadv_priv *bat_priv, + struct sk_buff *skb, + struct batadv_orig_node *orig_node, + int packet_subtype); +int batadv_send_skb_generic_unicast(struct batadv_priv *bat_priv, + struct sk_buff *skb, int packet_type, + int packet_subtype); + + +/** + * batadv_send_unicast_skb - send the skb encapsulated in a unicast packet + * @bat_priv: the bat priv with all the soft interface information + * @skb: the payload to send + * + * Returns 1 in case of error or 0 otherwise. + */ +static inline int batadv_send_skb_unicast(struct batadv_priv *bat_priv, + struct sk_buff *skb) +{ + return batadv_send_skb_generic_unicast(bat_priv, skb, BATADV_UNICAST, + 0); +} + +/** + * batadv_send_4addr_unicast_skb - send the skb encapsulated in a unicast 4addr + * packet + * @bat_priv: the bat priv with all the soft interface information + * @skb: the payload to send + * @packet_subtype: the unicast 4addr packet subtype to use + * + * Returns 1 in case of error or 0 otherwise. + */ +static inline int batadv_send_skb_unicast_4addr(struct batadv_priv *bat_priv, + struct sk_buff *skb, + int packet_subtype) +{ + return batadv_send_skb_generic_unicast(bat_priv, skb, + BATADV_UNICAST_4ADDR, + packet_subtype); +} #endif /* _NET_BATMAN_ADV_SEND_H_ */ diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 25e6004e8e01..e8a2bd699d40 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -34,8 +34,6 @@ #include <linux/ethtool.h> #include <linux/etherdevice.h> #include <linux/if_vlan.h> -#include <linux/if_ether.h> -#include "unicast.h" #include "bridge_loop_avoidance.h" #include "network-coding.h" @@ -139,6 +137,18 @@ static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu) return 0; } +/** + * batadv_interface_set_rx_mode - set the rx mode of a device + * @dev: registered network device to modify + * + * We do not actually need to set any rx filters for the virtual batman + * soft interface. However a dummy handler enables a user to set static + * multicast listeners for instance. + */ +static void batadv_interface_set_rx_mode(struct net_device *dev) +{ +} + static int batadv_interface_tx(struct sk_buff *skb, struct net_device *soft_iface) { @@ -147,7 +157,7 @@ static int batadv_interface_tx(struct sk_buff *skb, struct batadv_hard_iface *primary_if = NULL; struct batadv_bcast_packet *bcast_packet; struct vlan_ethhdr *vhdr; - __be16 ethertype = __constant_htons(ETH_P_BATMAN); + __be16 ethertype = htons(ETH_P_BATMAN); static const uint8_t stp_addr[ETH_ALEN] = {0x01, 0x80, 0xC2, 0x00, 0x00, 0x00}; static const uint8_t ectp_addr[ETH_ALEN] = {0xCF, 0x00, 0x00, 0x00, @@ -286,7 +296,7 @@ static int batadv_interface_tx(struct sk_buff *skb, batadv_dat_snoop_outgoing_arp_reply(bat_priv, skb); - ret = batadv_unicast_send_skb(bat_priv, skb); + ret = batadv_send_skb_unicast(bat_priv, skb); if (ret != 0) goto dropped_freed; } @@ -314,7 +324,7 @@ void batadv_interface_rx(struct net_device *soft_iface, struct vlan_ethhdr *vhdr; struct batadv_header *batadv_header = (struct batadv_header *)skb->data; unsigned short vid __maybe_unused = BATADV_NO_FLAGS; - __be16 ethertype = __constant_htons(ETH_P_BATMAN); + __be16 ethertype = htons(ETH_P_BATMAN); bool is_bcast; is_bcast = (batadv_header->packet_type == BATADV_BCAST); @@ -444,6 +454,7 @@ static void batadv_softif_destroy_finish(struct work_struct *work) static int batadv_softif_init_late(struct net_device *dev) { struct batadv_priv *bat_priv; + uint32_t random_seqno; int ret; size_t cnt_len = sizeof(uint64_t) * BATADV_CNT_NUM; @@ -493,6 +504,10 @@ static int batadv_softif_init_late(struct net_device *dev) bat_priv->tt.last_changeset = NULL; bat_priv->tt.last_changeset_len = 0; + /* randomize initial seqno to avoid collision */ + get_random_bytes(&random_seqno, sizeof(random_seqno)); + atomic_set(&bat_priv->frag_seqno, random_seqno); + bat_priv->primary_if = NULL; bat_priv->num_ifaces = 0; @@ -580,6 +595,7 @@ static const struct net_device_ops batadv_netdev_ops = { .ndo_get_stats = batadv_interface_stats, .ndo_set_mac_address = batadv_interface_set_mac_addr, .ndo_change_mtu = batadv_interface_change_mtu, + .ndo_set_rx_mode = batadv_interface_set_rx_mode, .ndo_start_xmit = batadv_interface_tx, .ndo_validate_addr = eth_validate_addr, .ndo_add_slave = batadv_softif_slave_add, @@ -623,7 +639,7 @@ static void batadv_softif_init_early(struct net_device *dev) */ dev->mtu = ETH_DATA_LEN; /* reserve more space in the skbuff for our header */ - dev->hard_header_len = BATADV_HEADER_LEN; + dev->hard_header_len = batadv_max_header_len(); /* generate random address */ eth_hw_addr_random(dev); @@ -760,6 +776,12 @@ static const struct { { "mgmt_tx_bytes" }, { "mgmt_rx" }, { "mgmt_rx_bytes" }, + { "frag_tx" }, + { "frag_tx_bytes" }, + { "frag_rx" }, + { "frag_rx_bytes" }, + { "frag_fwd" }, + { "frag_fwd_bytes" }, { "tt_request_tx" }, { "tt_request_rx" }, { "tt_response_tx" }, diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index c7416947a4e0..b521afb186d4 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -117,25 +117,17 @@ batadv_tt_local_entry_free_ref(struct batadv_tt_local_entry *tt_local_entry) kfree_rcu(tt_local_entry, common.rcu); } -static void batadv_tt_global_entry_free_rcu(struct rcu_head *rcu) -{ - struct batadv_tt_common_entry *tt_common_entry; - struct batadv_tt_global_entry *tt_global_entry; - - tt_common_entry = container_of(rcu, struct batadv_tt_common_entry, rcu); - tt_global_entry = container_of(tt_common_entry, - struct batadv_tt_global_entry, common); - - kfree(tt_global_entry); -} - +/** + * batadv_tt_global_entry_free_ref - decrement the refcounter for a + * tt_global_entry and possibly free it + * @tt_global_entry: the object to free + */ static void batadv_tt_global_entry_free_ref(struct batadv_tt_global_entry *tt_global_entry) { if (atomic_dec_and_test(&tt_global_entry->common.refcount)) { batadv_tt_global_del_orig_list(tt_global_entry); - call_rcu(&tt_global_entry->common.rcu, - batadv_tt_global_entry_free_rcu); + kfree_rcu(tt_global_entry, common.rcu); } } @@ -240,6 +232,17 @@ static int batadv_tt_len(int changes_num) return changes_num * sizeof(struct batadv_tvlv_tt_change); } +/** + * batadv_tt_entries - compute the number of entries fitting in tt_len bytes + * @tt_len: available space + * + * Returns the number of entries. + */ +static uint16_t batadv_tt_entries(uint16_t tt_len) +{ + return tt_len / batadv_tt_len(1); +} + static int batadv_tt_local_init(struct batadv_priv *bat_priv) { if (bat_priv->tt.local_hash) @@ -414,7 +417,7 @@ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv) if (tt_diff_len == 0) goto container_register; - tt_diff_entries_num = tt_diff_len / batadv_tt_len(1); + tt_diff_entries_num = batadv_tt_entries(tt_diff_len); spin_lock_bh(&bat_priv->tt.changes_list_lock); atomic_set(&bat_priv->tt.local_changes, 0); @@ -805,15 +808,17 @@ out: * If a TT local entry exists for this non-mesh client remove it. * * The caller must hold orig_node refcount. + * + * Return true if the new entry has been added, false otherwise */ -int batadv_tt_global_add(struct batadv_priv *bat_priv, - struct batadv_orig_node *orig_node, - const unsigned char *tt_addr, uint16_t flags, - uint8_t ttvn) +static bool batadv_tt_global_add(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node, + const unsigned char *tt_addr, uint16_t flags, + uint8_t ttvn) { struct batadv_tt_global_entry *tt_global_entry; struct batadv_tt_local_entry *tt_local_entry; - int ret = 0; + bool ret = false; int hash_added; struct batadv_tt_common_entry *common; uint16_t local_flags; @@ -914,7 +919,7 @@ add_orig_entry: batadv_dbg(BATADV_DBG_TT, bat_priv, "Creating new global tt entry: %pM (via %pM)\n", common->addr, orig_node->orig); - ret = 1; + ret = true; out_remove: @@ -1491,11 +1496,9 @@ static void batadv_tt_req_list_free(struct batadv_priv *bat_priv) static void batadv_tt_save_orig_buffer(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, - const unsigned char *tt_buff, - uint16_t tt_num_changes) + const void *tt_buff, + uint16_t tt_buff_len) { - uint16_t tt_buff_len = batadv_tt_len(tt_num_changes); - /* Replace the old buffer only if I received something in the * last OGM (the OGM could carry no changes) */ @@ -1622,7 +1625,7 @@ batadv_tt_tvlv_generate(struct batadv_priv *bat_priv, tt_len -= tt_len % sizeof(struct batadv_tvlv_tt_change); } - tt_tot = tt_len / sizeof(struct batadv_tvlv_tt_change); + tt_tot = batadv_tt_entries(tt_len); tvlv_tt_data = kzalloc(sizeof(*tvlv_tt_data) + tt_len, GFP_ATOMIC); @@ -2032,8 +2035,8 @@ static void batadv_tt_update_changes(struct batadv_priv *bat_priv, _batadv_tt_update_changes(bat_priv, orig_node, tt_change, tt_num_changes, ttvn); - batadv_tt_save_orig_buffer(bat_priv, orig_node, - (unsigned char *)tt_change, tt_num_changes); + batadv_tt_save_orig_buffer(bat_priv, orig_node, tt_change, + batadv_tt_len(tt_num_changes)); atomic_set(&orig_node->last_ttvn, ttvn); } @@ -2573,7 +2576,7 @@ static void batadv_tt_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, tt_data = (struct batadv_tvlv_tt_data *)tvlv_value; tvlv_value_len -= sizeof(*tt_data); - num_entries = tvlv_value_len / batadv_tt_len(1); + num_entries = batadv_tt_entries(tvlv_value_len); batadv_tt_update_orig(bat_priv, orig, (unsigned char *)(tt_data + 1), @@ -2608,7 +2611,7 @@ static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv, tt_data = (struct batadv_tvlv_tt_data *)tvlv_value; tvlv_value_len -= sizeof(*tt_data); - num_entries = tvlv_value_len / batadv_tt_len(1); + num_entries = batadv_tt_entries(tvlv_value_len); switch (tt_data->flags & BATADV_TT_DATA_TYPE_MASK) { case BATADV_TT_REQUEST: diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index b4b6dea4e2be..015d8b9e63b9 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -27,13 +27,6 @@ uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv, const uint8_t *addr, const char *message, bool roaming); int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset); -void batadv_tt_global_add_orig(struct batadv_priv *bat_priv, - struct batadv_orig_node *orig_node, - const unsigned char *tt_buff, int tt_buff_len); -int batadv_tt_global_add(struct batadv_priv *bat_priv, - struct batadv_orig_node *orig_node, - const unsigned char *addr, uint16_t flags, - uint8_t ttvn); int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset); void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 8fbd89d167cd..5cbb0d09a9b5 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -24,13 +24,6 @@ #include "bitarray.h" #include <linux/kernel.h> -/** - * Maximum overhead for the encapsulation for a payload packet - */ -#define BATADV_HEADER_LEN \ - (ETH_HLEN + max(sizeof(struct batadv_unicast_packet), \ - sizeof(struct batadv_bcast_packet))) - #ifdef CONFIG_BATMAN_ADV_DAT /* batadv_dat_addr_t is the type used for all DHT addresses. If it is changed, @@ -60,7 +53,6 @@ struct batadv_hard_iface_bat_iv { * @if_num: identificator of the interface * @if_status: status of the interface for batman-adv * @net_dev: pointer to the net_device - * @frag_seqno: last fragment sequence number sent by this interface * @num_bcasts: number of payload re-broadcasts on this interface (ARQ) * @hardif_obj: kobject of the per interface sysfs "mesh" directory * @refcount: number of contexts the object is used @@ -76,7 +68,6 @@ struct batadv_hard_iface { int16_t if_num; char if_status; struct net_device *net_dev; - atomic_t frag_seqno; uint8_t num_bcasts; struct kobject *hardif_obj; atomic_t refcount; @@ -88,6 +79,34 @@ struct batadv_hard_iface { }; /** + * struct batadv_frag_table_entry - head in the fragment buffer table + * @head: head of list with fragments + * @lock: lock to protect the list of fragments + * @timestamp: time (jiffie) of last received fragment + * @seqno: sequence number of the fragments in the list + * @size: accumulated size of packets in list + */ +struct batadv_frag_table_entry { + struct hlist_head head; + spinlock_t lock; /* protects head */ + unsigned long timestamp; + uint16_t seqno; + uint16_t size; +}; + +/** + * struct batadv_frag_list_entry - entry in a list of fragments + * @list: list node information + * @skb: fragment + * @no: fragment number in the set + */ +struct batadv_frag_list_entry { + struct hlist_node list; + struct sk_buff *skb; + uint8_t no; +}; + +/** * struct batadv_orig_node - structure for orig_list maintaining nodes of mesh * @orig: originator ethernet address * @primary_addr: hosts primary interface address @@ -116,9 +135,6 @@ struct batadv_hard_iface { * last_bcast_seqno) * @last_bcast_seqno: last broadcast sequence number received by this host * @neigh_list: list of potential next hop neighbor towards this orig node - * @frag_list: fragmentation buffer list for fragment re-assembly - * @last_frag_packet: time when last fragmented packet from this node was - * received * @neigh_list_lock: lock protecting neigh_list, router and bonding_list * @hash_entry: hlist node for batadv_priv::orig_hash * @bat_priv: pointer to soft_iface this orig node belongs to @@ -133,6 +149,7 @@ struct batadv_hard_iface { * @out_coding_list: list of nodes that can hear this orig * @in_coding_list_lock: protects in_coding_list * @out_coding_list_lock: protects out_coding_list + * @fragments: array with heads for fragment chains */ struct batadv_orig_node { uint8_t orig[ETH_ALEN]; @@ -159,8 +176,6 @@ struct batadv_orig_node { DECLARE_BITMAP(bcast_bits, BATADV_TQ_LOCAL_WINDOW_SIZE); uint32_t last_bcast_seqno; struct hlist_head neigh_list; - struct list_head frag_list; - unsigned long last_frag_packet; /* neigh_list_lock protects: neigh_list, router & bonding_list */ spinlock_t neigh_list_lock; struct hlist_node hash_entry; @@ -181,6 +196,7 @@ struct batadv_orig_node { spinlock_t in_coding_list_lock; /* Protects in_coding_list */ spinlock_t out_coding_list_lock; /* Protects out_coding_list */ #endif + struct batadv_frag_table_entry fragments[BATADV_FRAG_BUFFER_COUNT]; }; /** @@ -277,6 +293,12 @@ struct batadv_bcast_duplist_entry { * @BATADV_CNT_MGMT_TX_BYTES: transmitted routing protocol traffic bytes counter * @BATADV_CNT_MGMT_RX: received routing protocol traffic packet counter * @BATADV_CNT_MGMT_RX_BYTES: received routing protocol traffic bytes counter + * @BATADV_CNT_FRAG_TX: transmitted fragment traffic packet counter + * @BATADV_CNT_FRAG_TX_BYTES: transmitted fragment traffic bytes counter + * @BATADV_CNT_FRAG_RX: received fragment traffic packet counter + * @BATADV_CNT_FRAG_RX_BYTES: received fragment traffic bytes counter + * @BATADV_CNT_FRAG_FWD: forwarded fragment traffic packet counter + * @BATADV_CNT_FRAG_FWD_BYTES: forwarded fragment traffic bytes counter * @BATADV_CNT_TT_REQUEST_TX: transmitted tt req traffic packet counter * @BATADV_CNT_TT_REQUEST_RX: received tt req traffic packet counter * @BATADV_CNT_TT_RESPONSE_TX: transmitted tt resp traffic packet counter @@ -314,6 +336,12 @@ enum batadv_counters { BATADV_CNT_MGMT_TX_BYTES, BATADV_CNT_MGMT_RX, BATADV_CNT_MGMT_RX_BYTES, + BATADV_CNT_FRAG_TX, + BATADV_CNT_FRAG_TX_BYTES, + BATADV_CNT_FRAG_RX, + BATADV_CNT_FRAG_RX_BYTES, + BATADV_CNT_FRAG_FWD, + BATADV_CNT_FRAG_FWD_BYTES, BATADV_CNT_TT_REQUEST_TX, BATADV_CNT_TT_REQUEST_RX, BATADV_CNT_TT_RESPONSE_TX, @@ -511,6 +539,7 @@ struct batadv_priv_nc { * @aggregated_ogms: bool indicating whether OGM aggregation is enabled * @bonding: bool indicating whether traffic bonding is enabled * @fragmentation: bool indicating whether traffic fragmentation is enabled + * @frag_seqno: incremental counter to identify chains of egress fragments * @ap_isolation: bool indicating whether ap isolation is enabled * @bridge_loop_avoidance: bool indicating whether bridge loop avoidance is * enabled @@ -554,6 +583,7 @@ struct batadv_priv { atomic_t aggregated_ogms; atomic_t bonding; atomic_t fragmentation; + atomic_t frag_seqno; atomic_t ap_isolation; #ifdef CONFIG_BATMAN_ADV_BLA atomic_t bridge_loop_avoidance; @@ -874,18 +904,6 @@ struct batadv_forw_packet { }; /** - * struct batadv_frag_packet_list_entry - storage for fragment packet - * @list: list node for orig_node::frag_list - * @seqno: sequence number of the fragment - * @skb: fragment's skb buffer - */ -struct batadv_frag_packet_list_entry { - struct list_head list; - uint16_t seqno; - struct sk_buff *skb; -}; - -/** * struct batadv_algo_ops - mesh algorithm callbacks * @list: list node for the batadv_algo_list * @name: name of the algorithm diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c deleted file mode 100644 index 48b31d33ce6b..000000000000 --- a/net/batman-adv/unicast.c +++ /dev/null @@ -1,491 +0,0 @@ -/* Copyright (C) 2010-2013 B.A.T.M.A.N. contributors: - * - * Andreas Langer - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA - */ - -#include "main.h" -#include "unicast.h" -#include "send.h" -#include "soft-interface.h" -#include "gateway_client.h" -#include "originator.h" -#include "hash.h" -#include "translation-table.h" -#include "routing.h" -#include "hard-interface.h" - - -static struct sk_buff * -batadv_frag_merge_packet(struct list_head *head, - struct batadv_frag_packet_list_entry *tfp, - struct sk_buff *skb) -{ - struct batadv_unicast_frag_packet *up; - struct sk_buff *tmp_skb; - struct batadv_unicast_packet *unicast_packet; - int hdr_len = sizeof(*unicast_packet); - int uni_diff = sizeof(*up) - hdr_len; - uint8_t *packet_pos; - - up = (struct batadv_unicast_frag_packet *)skb->data; - /* set skb to the first part and tmp_skb to the second part */ - if (up->flags & BATADV_UNI_FRAG_HEAD) { - tmp_skb = tfp->skb; - } else { - tmp_skb = skb; - skb = tfp->skb; - } - - if (skb_linearize(skb) < 0 || skb_linearize(tmp_skb) < 0) - goto err; - - skb_pull(tmp_skb, sizeof(*up)); - if (pskb_expand_head(skb, 0, tmp_skb->len, GFP_ATOMIC) < 0) - goto err; - - /* move free entry to end */ - tfp->skb = NULL; - tfp->seqno = 0; - list_move_tail(&tfp->list, head); - - memcpy(skb_put(skb, tmp_skb->len), tmp_skb->data, tmp_skb->len); - kfree_skb(tmp_skb); - - memmove(skb->data + uni_diff, skb->data, hdr_len); - packet_pos = skb_pull(skb, uni_diff); - unicast_packet = (struct batadv_unicast_packet *)packet_pos; - unicast_packet->header.packet_type = BATADV_UNICAST; - - return skb; - -err: - /* free buffered skb, skb will be freed later */ - kfree_skb(tfp->skb); - return NULL; -} - -static void batadv_frag_create_entry(struct list_head *head, - struct sk_buff *skb) -{ - struct batadv_frag_packet_list_entry *tfp; - struct batadv_unicast_frag_packet *up; - - up = (struct batadv_unicast_frag_packet *)skb->data; - - /* free and oldest packets stand at the end */ - tfp = list_entry((head)->prev, typeof(*tfp), list); - kfree_skb(tfp->skb); - - tfp->seqno = ntohs(up->seqno); - tfp->skb = skb; - list_move(&tfp->list, head); - return; -} - -static int batadv_frag_create_buffer(struct list_head *head) -{ - int i; - struct batadv_frag_packet_list_entry *tfp; - - for (i = 0; i < BATADV_FRAG_BUFFER_SIZE; i++) { - tfp = kmalloc(sizeof(*tfp), GFP_ATOMIC); - if (!tfp) { - batadv_frag_list_free(head); - return -ENOMEM; - } - tfp->skb = NULL; - tfp->seqno = 0; - INIT_LIST_HEAD(&tfp->list); - list_add(&tfp->list, head); - } - - return 0; -} - -static struct batadv_frag_packet_list_entry * -batadv_frag_search_packet(struct list_head *head, - const struct batadv_unicast_frag_packet *up) -{ - struct batadv_frag_packet_list_entry *tfp; - struct batadv_unicast_frag_packet *tmp_up = NULL; - bool is_head_tmp, is_head; - uint16_t search_seqno; - - if (up->flags & BATADV_UNI_FRAG_HEAD) - search_seqno = ntohs(up->seqno)+1; - else - search_seqno = ntohs(up->seqno)-1; - - is_head = up->flags & BATADV_UNI_FRAG_HEAD; - - list_for_each_entry(tfp, head, list) { - if (!tfp->skb) - continue; - - if (tfp->seqno == ntohs(up->seqno)) - goto mov_tail; - - tmp_up = (struct batadv_unicast_frag_packet *)tfp->skb->data; - - if (tfp->seqno == search_seqno) { - is_head_tmp = tmp_up->flags & BATADV_UNI_FRAG_HEAD; - if (is_head_tmp != is_head) - return tfp; - else - goto mov_tail; - } - } - return NULL; - -mov_tail: - list_move_tail(&tfp->list, head); - return NULL; -} - -void batadv_frag_list_free(struct list_head *head) -{ - struct batadv_frag_packet_list_entry *pf, *tmp_pf; - - if (!list_empty(head)) { - list_for_each_entry_safe(pf, tmp_pf, head, list) { - kfree_skb(pf->skb); - list_del(&pf->list); - kfree(pf); - } - } - return; -} - -/* frag_reassemble_skb(): - * returns NET_RX_DROP if the operation failed - skb is left intact - * returns NET_RX_SUCCESS if the fragment was buffered (skb_new will be NULL) - * or the skb could be reassembled (skb_new will point to the new packet and - * skb was freed) - */ -int batadv_frag_reassemble_skb(struct sk_buff *skb, - struct batadv_priv *bat_priv, - struct sk_buff **new_skb) -{ - struct batadv_orig_node *orig_node; - struct batadv_frag_packet_list_entry *tmp_frag_entry; - int ret = NET_RX_DROP; - struct batadv_unicast_frag_packet *unicast_packet; - - unicast_packet = (struct batadv_unicast_frag_packet *)skb->data; - *new_skb = NULL; - - orig_node = batadv_orig_hash_find(bat_priv, unicast_packet->orig); - if (!orig_node) - goto out; - - orig_node->last_frag_packet = jiffies; - - if (list_empty(&orig_node->frag_list) && - batadv_frag_create_buffer(&orig_node->frag_list)) { - pr_debug("couldn't create frag buffer\n"); - goto out; - } - - tmp_frag_entry = batadv_frag_search_packet(&orig_node->frag_list, - unicast_packet); - - if (!tmp_frag_entry) { - batadv_frag_create_entry(&orig_node->frag_list, skb); - ret = NET_RX_SUCCESS; - goto out; - } - - *new_skb = batadv_frag_merge_packet(&orig_node->frag_list, - tmp_frag_entry, skb); - /* if not, merge failed */ - if (*new_skb) - ret = NET_RX_SUCCESS; - -out: - if (orig_node) - batadv_orig_node_free_ref(orig_node); - return ret; -} - -int batadv_frag_send_skb(struct sk_buff *skb, struct batadv_priv *bat_priv, - struct batadv_hard_iface *hard_iface, - const uint8_t dstaddr[]) -{ - struct batadv_unicast_packet tmp_uc, *unicast_packet; - struct batadv_hard_iface *primary_if; - struct sk_buff *frag_skb; - struct batadv_unicast_frag_packet *frag1, *frag2; - int uc_hdr_len = sizeof(*unicast_packet); - int ucf_hdr_len = sizeof(*frag1); - int data_len = skb->len - uc_hdr_len; - int large_tail = 0, ret = NET_RX_DROP; - uint16_t seqno; - - primary_if = batadv_primary_if_get_selected(bat_priv); - if (!primary_if) - goto dropped; - - frag_skb = dev_alloc_skb(data_len - (data_len / 2) + ucf_hdr_len); - if (!frag_skb) - goto dropped; - - skb->priority = TC_PRIO_CONTROL; - skb_reserve(frag_skb, ucf_hdr_len); - - unicast_packet = (struct batadv_unicast_packet *)skb->data; - memcpy(&tmp_uc, unicast_packet, uc_hdr_len); - skb_split(skb, frag_skb, data_len / 2 + uc_hdr_len); - - if (batadv_skb_head_push(skb, ucf_hdr_len - uc_hdr_len) < 0 || - batadv_skb_head_push(frag_skb, ucf_hdr_len) < 0) - goto drop_frag; - - frag1 = (struct batadv_unicast_frag_packet *)skb->data; - frag2 = (struct batadv_unicast_frag_packet *)frag_skb->data; - - memcpy(frag1, &tmp_uc, sizeof(tmp_uc)); - - frag1->header.ttl--; - frag1->header.version = BATADV_COMPAT_VERSION; - frag1->header.packet_type = BATADV_UNICAST_FRAG; - - memcpy(frag1->orig, primary_if->net_dev->dev_addr, ETH_ALEN); - memcpy(frag2, frag1, sizeof(*frag2)); - - if (data_len & 1) - large_tail = BATADV_UNI_FRAG_LARGETAIL; - - frag1->flags = BATADV_UNI_FRAG_HEAD | large_tail; - frag2->flags = large_tail; - - seqno = atomic_add_return(2, &hard_iface->frag_seqno); - frag1->seqno = htons(seqno - 1); - frag2->seqno = htons(seqno); - - batadv_send_skb_packet(skb, hard_iface, dstaddr); - batadv_send_skb_packet(frag_skb, hard_iface, dstaddr); - ret = NET_RX_SUCCESS; - goto out; - -drop_frag: - kfree_skb(frag_skb); -dropped: - kfree_skb(skb); -out: - if (primary_if) - batadv_hardif_free_ref(primary_if); - return ret; -} - -/** - * batadv_unicast_push_and_fill_skb - extends the buffer and initializes the - * common fields for unicast packets - * @skb: packet - * @hdr_size: amount of bytes to push at the beginning of the skb - * @orig_node: the destination node - * - * Returns false if the buffer extension was not possible or true otherwise - */ -static bool batadv_unicast_push_and_fill_skb(struct sk_buff *skb, int hdr_size, - struct batadv_orig_node *orig_node) -{ - struct batadv_unicast_packet *unicast_packet; - uint8_t ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn); - - if (batadv_skb_head_push(skb, hdr_size) < 0) - return false; - - unicast_packet = (struct batadv_unicast_packet *)skb->data; - unicast_packet->header.version = BATADV_COMPAT_VERSION; - /* batman packet type: unicast */ - unicast_packet->header.packet_type = BATADV_UNICAST; - /* set unicast ttl */ - unicast_packet->header.ttl = BATADV_TTL; - /* copy the destination for faster routing */ - memcpy(unicast_packet->dest, orig_node->orig, ETH_ALEN); - /* set the destination tt version number */ - unicast_packet->ttvn = ttvn; - - return true; -} - -/** - * batadv_unicast_prepare_skb - encapsulate an skb with a unicast header - * @skb: the skb containing the payload to encapsulate - * @orig_node: the destination node - * - * Returns false if the payload could not be encapsulated or true otherwise. - * - * This call might reallocate skb data. - */ -static bool batadv_unicast_prepare_skb(struct sk_buff *skb, - struct batadv_orig_node *orig_node) -{ - size_t uni_size = sizeof(struct batadv_unicast_packet); - return batadv_unicast_push_and_fill_skb(skb, uni_size, orig_node); -} - -/** - * batadv_unicast_4addr_prepare_skb - encapsulate an skb with a unicast4addr - * header - * @bat_priv: the bat priv with all the soft interface information - * @skb: the skb containing the payload to encapsulate - * @orig_node: the destination node - * @packet_subtype: the batman 4addr packet subtype to use - * - * Returns false if the payload could not be encapsulated or true otherwise. - * - * This call might reallocate skb data. - */ -bool batadv_unicast_4addr_prepare_skb(struct batadv_priv *bat_priv, - struct sk_buff *skb, - struct batadv_orig_node *orig, - int packet_subtype) -{ - struct batadv_hard_iface *primary_if; - struct batadv_unicast_4addr_packet *unicast_4addr_packet; - bool ret = false; - - primary_if = batadv_primary_if_get_selected(bat_priv); - if (!primary_if) - goto out; - - /* pull the header space and fill the unicast_packet substructure. - * We can do that because the first member of the unicast_4addr_packet - * is of type struct unicast_packet - */ - if (!batadv_unicast_push_and_fill_skb(skb, - sizeof(*unicast_4addr_packet), - orig)) - goto out; - - unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; - unicast_4addr_packet->u.header.packet_type = BATADV_UNICAST_4ADDR; - memcpy(unicast_4addr_packet->src, primary_if->net_dev->dev_addr, - ETH_ALEN); - unicast_4addr_packet->subtype = packet_subtype; - unicast_4addr_packet->reserved = 0; - - ret = true; -out: - if (primary_if) - batadv_hardif_free_ref(primary_if); - return ret; -} - -/** - * batadv_unicast_generic_send_skb - send an skb as unicast - * @bat_priv: the bat priv with all the soft interface information - * @skb: payload to send - * @packet_type: the batman unicast packet type to use - * @packet_subtype: the batman packet subtype. It is ignored if packet_type is - * not BATADV_UNICAT_4ADDR - * - * Returns 1 in case of error or 0 otherwise - */ -int batadv_unicast_generic_send_skb(struct batadv_priv *bat_priv, - struct sk_buff *skb, int packet_type, - int packet_subtype) -{ - struct ethhdr *ethhdr = (struct ethhdr *)skb->data; - struct batadv_unicast_packet *unicast_packet; - struct batadv_orig_node *orig_node; - struct batadv_neigh_node *neigh_node; - int data_len = skb->len; - int ret = NET_RX_DROP; - unsigned int dev_mtu, header_len; - - /* get routing information */ - if (is_multicast_ether_addr(ethhdr->h_dest)) { - orig_node = batadv_gw_get_selected_orig(bat_priv); - if (orig_node) - goto find_router; - } - - /* check for tt host - increases orig_node refcount. - * returns NULL in case of AP isolation - */ - orig_node = batadv_transtable_search(bat_priv, ethhdr->h_source, - ethhdr->h_dest); - -find_router: - /* find_router(): - * - if orig_node is NULL it returns NULL - * - increases neigh_nodes refcount if found. - */ - neigh_node = batadv_find_router(bat_priv, orig_node, NULL); - - if (!neigh_node) - goto out; - - switch (packet_type) { - case BATADV_UNICAST: - if (!batadv_unicast_prepare_skb(skb, orig_node)) - goto out; - - header_len = sizeof(struct batadv_unicast_packet); - break; - case BATADV_UNICAST_4ADDR: - if (!batadv_unicast_4addr_prepare_skb(bat_priv, skb, orig_node, - packet_subtype)) - goto out; - - header_len = sizeof(struct batadv_unicast_4addr_packet); - break; - default: - /* this function supports UNICAST and UNICAST_4ADDR only. It - * should never be invoked with any other packet type - */ - goto out; - } - - ethhdr = (struct ethhdr *)(skb->data + header_len); - unicast_packet = (struct batadv_unicast_packet *)skb->data; - - /* inform the destination node that we are still missing a correct route - * for this client. The destination will receive this packet and will - * try to reroute it because the ttvn contained in the header is less - * than the current one - */ - if (batadv_tt_global_client_is_roaming(bat_priv, ethhdr->h_dest)) - unicast_packet->ttvn = unicast_packet->ttvn - 1; - - dev_mtu = neigh_node->if_incoming->net_dev->mtu; - /* fragmentation mechanism only works for UNICAST (now) */ - if (packet_type == BATADV_UNICAST && - atomic_read(&bat_priv->fragmentation) && - data_len + sizeof(*unicast_packet) > dev_mtu) { - /* send frag skb decreases ttl */ - unicast_packet->header.ttl++; - ret = batadv_frag_send_skb(skb, bat_priv, - neigh_node->if_incoming, - neigh_node->addr); - goto out; - } - - if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) - ret = 0; - -out: - if (neigh_node) - batadv_neigh_node_free_ref(neigh_node); - if (orig_node) - batadv_orig_node_free_ref(orig_node); - if (ret == NET_RX_DROP) - kfree_skb(skb); - return ret; -} diff --git a/net/batman-adv/unicast.h b/net/batman-adv/unicast.h deleted file mode 100644 index 429cf8a4a31e..000000000000 --- a/net/batman-adv/unicast.h +++ /dev/null @@ -1,92 +0,0 @@ -/* Copyright (C) 2010-2013 B.A.T.M.A.N. contributors: - * - * Andreas Langer - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA - */ - -#ifndef _NET_BATMAN_ADV_UNICAST_H_ -#define _NET_BATMAN_ADV_UNICAST_H_ - -#include "packet.h" - -#define BATADV_FRAG_TIMEOUT 10000 /* purge frag list entries after time in ms */ -#define BATADV_FRAG_BUFFER_SIZE 6 /* number of list elements in buffer */ - -int batadv_frag_reassemble_skb(struct sk_buff *skb, - struct batadv_priv *bat_priv, - struct sk_buff **new_skb); -void batadv_frag_list_free(struct list_head *head); -int batadv_frag_send_skb(struct sk_buff *skb, struct batadv_priv *bat_priv, - struct batadv_hard_iface *hard_iface, - const uint8_t dstaddr[]); -bool batadv_unicast_4addr_prepare_skb(struct batadv_priv *bat_priv, - struct sk_buff *skb, - struct batadv_orig_node *orig_node, - int packet_subtype); -int batadv_unicast_generic_send_skb(struct batadv_priv *bat_priv, - struct sk_buff *skb, int packet_type, - int packet_subtype); - - -/** - * batadv_unicast_send_skb - send the skb encapsulated in a unicast packet - * @bat_priv: the bat priv with all the soft interface information - * @skb: the payload to send - */ -static inline int batadv_unicast_send_skb(struct batadv_priv *bat_priv, - struct sk_buff *skb) -{ - return batadv_unicast_generic_send_skb(bat_priv, skb, BATADV_UNICAST, - 0); -} - -/** - * batadv_unicast_send_skb - send the skb encapsulated in a unicast4addr packet - * @bat_priv: the bat priv with all the soft interface information - * @skb: the payload to send - * @packet_subtype: the batman 4addr packet subtype to use - */ -static inline int batadv_unicast_4addr_send_skb(struct batadv_priv *bat_priv, - struct sk_buff *skb, - int packet_subtype) -{ - return batadv_unicast_generic_send_skb(bat_priv, skb, - BATADV_UNICAST_4ADDR, - packet_subtype); -} - -static inline int batadv_frag_can_reassemble(const struct sk_buff *skb, int mtu) -{ - const struct batadv_unicast_frag_packet *unicast_packet; - int uneven_correction = 0; - unsigned int merged_size; - - unicast_packet = (struct batadv_unicast_frag_packet *)skb->data; - - if (unicast_packet->flags & BATADV_UNI_FRAG_LARGETAIL) { - if (unicast_packet->flags & BATADV_UNI_FRAG_HEAD) - uneven_correction = 1; - else - uneven_correction = -1; - } - - merged_size = (skb->len - sizeof(*unicast_packet)) * 2; - merged_size += sizeof(struct batadv_unicast_packet) + uneven_correction; - - return merged_size <= mtu; -} - -#endif /* _NET_BATMAN_ADV_UNICAST_H_ */ diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index f87736270eaa..878f008afefa 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -619,7 +619,7 @@ bad: /* Replicate the checks that IPv6 does on packet reception and pass the packet * to ip6tables, which doesn't support NAT, so things are fairly simple. */ -static unsigned int br_nf_pre_routing_ipv6(unsigned int hook, +static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -669,7 +669,8 @@ static unsigned int br_nf_pre_routing_ipv6(unsigned int hook, * receiving device) to make netfilter happy, the REDIRECT * target in particular. Save the original destination IP * address to be able to detect DNAT afterwards. */ -static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb, +static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -691,7 +692,7 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb, return NF_ACCEPT; nf_bridge_pull_encap_header_rcsum(skb); - return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn); + return br_nf_pre_routing_ipv6(ops, skb, in, out, okfn); } if (!brnf_call_iptables && !br->nf_call_iptables) @@ -727,7 +728,8 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb, * took place when the packet entered the bridge), but we * register an IPv4 PRE_ROUTING 'sabotage' hook that will * prevent this from happening. */ -static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff *skb, +static unsigned int br_nf_local_in(const struct nf_hook_ops *ops, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -765,7 +767,8 @@ static int br_nf_forward_finish(struct sk_buff *skb) * but we are still able to filter on the 'real' indev/outdev * because of the physdev module. For ARP, indev and outdev are the * bridge ports. */ -static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb, +static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -818,7 +821,8 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb, return NF_STOLEN; } -static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb, +static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -878,7 +882,8 @@ static int br_nf_dev_queue_xmit(struct sk_buff *skb) #endif /* PF_BRIDGE/POST_ROUTING ********************************************/ -static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb, +static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -923,7 +928,8 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb, /* IP/SABOTAGE *****************************************************/ /* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING * for the second time. */ -static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff *skb, +static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index a9aff9c7d027..68f8128147be 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -1,6 +1,9 @@ # # Bridge netfilter configuration # +# +config NF_TABLES_BRIDGE + tristate "Ethernet Bridge nf_tables support" menuconfig BRIDGE_NF_EBTABLES tristate "Ethernet Bridge tables (ebtables) support" diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile index 0718699540b0..ea7629f58b3d 100644 --- a/net/bridge/netfilter/Makefile +++ b/net/bridge/netfilter/Makefile @@ -2,6 +2,8 @@ # Makefile for the netfilter modules for Link Layer filtering on a bridge. # +obj-$(CONFIG_NF_TABLES_BRIDGE) += nf_tables_bridge.o + obj-$(CONFIG_BRIDGE_NF_EBTABLES) += ebtables.o # tables diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 94b2b700cff8..bb2da7b706e7 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -60,17 +60,21 @@ static const struct ebt_table frame_filter = }; static unsigned int -ebt_in_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, int (*okfn)(struct sk_buff *)) +ebt_in_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, skb, in, out, dev_net(in)->xt.frame_filter); + return ebt_do_table(ops->hooknum, skb, in, out, + dev_net(in)->xt.frame_filter); } static unsigned int -ebt_out_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, int (*okfn)(struct sk_buff *)) +ebt_out_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, skb, in, out, dev_net(out)->xt.frame_filter); + return ebt_do_table(ops->hooknum, skb, in, out, + dev_net(out)->xt.frame_filter); } static struct nf_hook_ops ebt_ops_filter[] __read_mostly = { diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 322555acdd40..bd238f1f105b 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -60,17 +60,21 @@ static struct ebt_table frame_nat = }; static unsigned int -ebt_nat_in(unsigned int hook, struct sk_buff *skb, const struct net_device *in - , const struct net_device *out, int (*okfn)(struct sk_buff *)) +ebt_nat_in(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, skb, in, out, dev_net(in)->xt.frame_nat); + return ebt_do_table(ops->hooknum, skb, in, out, + dev_net(in)->xt.frame_nat); } static unsigned int -ebt_nat_out(unsigned int hook, struct sk_buff *skb, const struct net_device *in - , const struct net_device *out, int (*okfn)(struct sk_buff *)) +ebt_nat_out(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, skb, in, out, dev_net(out)->xt.frame_nat); + return ebt_do_table(ops->hooknum, skb, in, out, + dev_net(out)->xt.frame_nat); } static struct nf_hook_ops ebt_ops_nat[] __read_mostly = { diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c new file mode 100644 index 000000000000..e8cb016fa34d --- /dev/null +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netfilter_bridge.h> +#include <net/netfilter/nf_tables.h> + +static struct nft_af_info nft_af_bridge __read_mostly = { + .family = NFPROTO_BRIDGE, + .nhooks = NF_BR_NUMHOOKS, + .owner = THIS_MODULE, +}; + +static int nf_tables_bridge_init_net(struct net *net) +{ + net->nft.bridge = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL); + if (net->nft.bridge == NULL) + return -ENOMEM; + + memcpy(net->nft.bridge, &nft_af_bridge, sizeof(nft_af_bridge)); + + if (nft_register_afinfo(net, net->nft.bridge) < 0) + goto err; + + return 0; +err: + kfree(net->nft.bridge); + return -ENOMEM; +} + +static void nf_tables_bridge_exit_net(struct net *net) +{ + nft_unregister_afinfo(net->nft.bridge); + kfree(net->nft.bridge); +} + +static struct pernet_operations nf_tables_bridge_net_ops = { + .init = nf_tables_bridge_init_net, + .exit = nf_tables_bridge_exit_net, +}; + +static int __init nf_tables_bridge_init(void) +{ + return register_pernet_subsys(&nf_tables_bridge_net_ops); +} + +static void __exit nf_tables_bridge_exit(void) +{ + return unregister_pernet_subsys(&nf_tables_bridge_net_ops); +} + +module_init(nf_tables_bridge_init); +module_exit(nf_tables_bridge_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS_NFT_FAMILY(AF_BRIDGE); diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index 2a7efe388344..e83015cecfa7 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -87,7 +87,7 @@ static void dnrmg_send_peer(struct sk_buff *skb) } -static unsigned int dnrmg_hook(unsigned int hook, +static unsigned int dnrmg_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 41e1c3ea8b51..56a964a553d2 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -336,12 +336,9 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s err = 0; out: - if (sk) { - if (sk->sk_state == TCP_TIME_WAIT) - inet_twsk_put((struct inet_timewait_sock *)sk); - else - sock_put(sk); - } + if (sk) + sock_gen_put(sk); + out_nosk: return err; } diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 1657e39b291f..40d56073cd19 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -36,6 +36,27 @@ config NF_CONNTRACK_PROC_COMPAT If unsure, say Y. +config NF_TABLES_IPV4 + depends on NF_TABLES + tristate "IPv4 nf_tables support" + +config NFT_REJECT_IPV4 + depends on NF_TABLES_IPV4 + tristate "nf_tables IPv4 reject support" + +config NFT_CHAIN_ROUTE_IPV4 + depends on NF_TABLES_IPV4 + tristate "IPv4 nf_tables route chain support" + +config NFT_CHAIN_NAT_IPV4 + depends on NF_TABLES_IPV4 + depends on NF_NAT_IPV4 && NFT_NAT + tristate "IPv4 nf_tables nat chain support" + +config NF_TABLES_ARP + depends on NF_TABLES + tristate "ARP nf_tables support" + config IP_NF_IPTABLES tristate "IP tables support (required for filtering/masq/NAT)" default m if NETFILTER_ADVANCED=n diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 3622b248b6dd..19df72b7ba88 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -27,6 +27,12 @@ obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o # NAT protocols (nf_nat) obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o +obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o +obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o +obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o +obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o +obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o + # generic IP tables obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index a865f6f94013..802ddecb30b8 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -27,13 +27,14 @@ static const struct xt_table packet_filter = { /* The work comes in here from netfilter.c */ static unsigned int -arptable_filter_hook(unsigned int hook, struct sk_buff *skb, +arptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { const struct net *net = dev_net((in != NULL) ? in : out); - return arpt_do_table(skb, hook, in, out, net->ipv4.arptable_filter); + return arpt_do_table(skb, ops->hooknum, in, out, + net->ipv4.arptable_filter); } static struct nf_hook_ops *arpfilter_ops __read_mostly; diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 0b732efd32e2..a2e2b61cd7da 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -483,7 +483,7 @@ static void arp_print(struct arp_payload *payload) #endif static unsigned int -arp_mangle(unsigned int hook, +arp_mangle(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index b6346bf2fde3..01cffeaa0085 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -297,7 +297,7 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) return XT_CONTINUE; } -static unsigned int ipv4_synproxy_hook(unsigned int hooknum, +static unsigned int ipv4_synproxy_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 50af5b45c050..e08a74a243a8 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -33,20 +33,21 @@ static const struct xt_table packet_filter = { }; static unsigned int -iptable_filter_hook(unsigned int hook, struct sk_buff *skb, +iptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { const struct net *net; - if (hook == NF_INET_LOCAL_OUT && + if (ops->hooknum == NF_INET_LOCAL_OUT && (skb->len < sizeof(struct iphdr) || ip_hdrlen(skb) < sizeof(struct iphdr))) /* root is playing with raw sockets. */ return NF_ACCEPT; net = dev_net((in != NULL) ? in : out); - return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_filter); + return ipt_do_table(skb, ops->hooknum, in, out, + net->ipv4.iptable_filter); } static struct nf_hook_ops *filter_ops __read_mostly; diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 0d8cd82e0fad..6a5079c34bb3 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -79,19 +79,19 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out) /* The work comes in here from netfilter.c. */ static unsigned int -iptable_mangle_hook(unsigned int hook, +iptable_mangle_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - if (hook == NF_INET_LOCAL_OUT) + if (ops->hooknum == NF_INET_LOCAL_OUT) return ipt_mangle_out(skb, out); - if (hook == NF_INET_POST_ROUTING) - return ipt_do_table(skb, hook, in, out, + if (ops->hooknum == NF_INET_POST_ROUTING) + return ipt_do_table(skb, ops->hooknum, in, out, dev_net(out)->ipv4.iptable_mangle); /* PREROUTING/INPUT/FORWARD: */ - return ipt_do_table(skb, hook, in, out, + return ipt_do_table(skb, ops->hooknum, in, out, dev_net(in)->ipv4.iptable_mangle); } diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index 683bfaffed65..ee2886126e3d 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -61,7 +61,7 @@ static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum, } static unsigned int -nf_nat_ipv4_fn(unsigned int hooknum, +nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -71,7 +71,7 @@ nf_nat_ipv4_fn(unsigned int hooknum, enum ip_conntrack_info ctinfo; struct nf_conn_nat *nat; /* maniptype == SRC for postrouting. */ - enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); + enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); /* We never see fragments: conntrack defrags on pre-routing * and local-out, and nf_nat_out protects post-routing. @@ -108,7 +108,7 @@ nf_nat_ipv4_fn(unsigned int hooknum, case IP_CT_RELATED_REPLY: if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, - hooknum)) + ops->hooknum)) return NF_DROP; else return NF_ACCEPT; @@ -121,14 +121,14 @@ nf_nat_ipv4_fn(unsigned int hooknum, if (!nf_nat_initialized(ct, maniptype)) { unsigned int ret; - ret = nf_nat_rule_find(skb, hooknum, in, out, ct); + ret = nf_nat_rule_find(skb, ops->hooknum, in, out, ct); if (ret != NF_ACCEPT) return ret; } else { pr_debug("Already setup manip %s for ct %p\n", maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", ct); - if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) + if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) goto oif_changed; } break; @@ -137,11 +137,11 @@ nf_nat_ipv4_fn(unsigned int hooknum, /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == IP_CT_ESTABLISHED_REPLY); - if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) + if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) goto oif_changed; } - return nf_nat_packet(ct, ctinfo, hooknum, skb); + return nf_nat_packet(ct, ctinfo, ops->hooknum, skb); oif_changed: nf_ct_kill_acct(ct, ctinfo, skb); @@ -149,7 +149,7 @@ oif_changed: } static unsigned int -nf_nat_ipv4_in(unsigned int hooknum, +nf_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -158,7 +158,7 @@ nf_nat_ipv4_in(unsigned int hooknum, unsigned int ret; __be32 daddr = ip_hdr(skb)->daddr; - ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn); + ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN && daddr != ip_hdr(skb)->daddr) skb_dst_drop(skb); @@ -167,7 +167,7 @@ nf_nat_ipv4_in(unsigned int hooknum, } static unsigned int -nf_nat_ipv4_out(unsigned int hooknum, +nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -185,7 +185,7 @@ nf_nat_ipv4_out(unsigned int hooknum, ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn); + ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn); #ifdef CONFIG_XFRM if (ret != NF_DROP && ret != NF_STOLEN && !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && @@ -207,7 +207,7 @@ nf_nat_ipv4_out(unsigned int hooknum, } static unsigned int -nf_nat_ipv4_local_fn(unsigned int hooknum, +nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -223,7 +223,7 @@ nf_nat_ipv4_local_fn(unsigned int hooknum, ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn); + ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN && (ct = nf_ct_get(skb, &ctinfo)) != NULL) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 1f82aea11df6..b2f7e8f98316 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -20,20 +20,20 @@ static const struct xt_table packet_raw = { /* The work comes in here from netfilter.c. */ static unsigned int -iptable_raw_hook(unsigned int hook, struct sk_buff *skb, +iptable_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { const struct net *net; - if (hook == NF_INET_LOCAL_OUT && + if (ops->hooknum == NF_INET_LOCAL_OUT && (skb->len < sizeof(struct iphdr) || ip_hdrlen(skb) < sizeof(struct iphdr))) /* root is playing with raw sockets. */ return NF_ACCEPT; net = dev_net((in != NULL) ? in : out); - return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_raw); + return ipt_do_table(skb, ops->hooknum, in, out, net->ipv4.iptable_raw); } static struct nf_hook_ops *rawtable_ops __read_mostly; diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index f867a8d38bf7..c86647ed2078 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -37,21 +37,22 @@ static const struct xt_table security_table = { }; static unsigned int -iptable_security_hook(unsigned int hook, struct sk_buff *skb, +iptable_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { const struct net *net; - if (hook == NF_INET_LOCAL_OUT && + if (ops->hooknum == NF_INET_LOCAL_OUT && (skb->len < sizeof(struct iphdr) || ip_hdrlen(skb) < sizeof(struct iphdr))) /* Somebody is playing with raw sockets. */ return NF_ACCEPT; net = dev_net((in != NULL) ? in : out); - return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_security); + return ipt_do_table(skb, ops->hooknum, in, out, + net->ipv4.iptable_security); } static struct nf_hook_ops *sectbl_ops __read_mostly; diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 86f5b34a4ed1..ecd8bec411c9 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -92,7 +92,7 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, return NF_ACCEPT; } -static unsigned int ipv4_helper(unsigned int hooknum, +static unsigned int ipv4_helper(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -121,7 +121,7 @@ static unsigned int ipv4_helper(unsigned int hooknum, ct, ctinfo); } -static unsigned int ipv4_confirm(unsigned int hooknum, +static unsigned int ipv4_confirm(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -147,16 +147,16 @@ out: return nf_conntrack_confirm(skb); } -static unsigned int ipv4_conntrack_in(unsigned int hooknum, +static unsigned int ipv4_conntrack_in(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return nf_conntrack_in(dev_net(in), PF_INET, hooknum, skb); + return nf_conntrack_in(dev_net(in), PF_INET, ops->hooknum, skb); } -static unsigned int ipv4_conntrack_local(unsigned int hooknum, +static unsigned int ipv4_conntrack_local(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -166,7 +166,7 @@ static unsigned int ipv4_conntrack_local(unsigned int hooknum, if (skb->len < sizeof(struct iphdr) || ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - return nf_conntrack_in(dev_net(out), PF_INET, hooknum, skb); + return nf_conntrack_in(dev_net(out), PF_INET, ops->hooknum, skb); } /* Connection tracking may drop packets, but never alters them, so diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 742815518b0f..12e13bd82b5b 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -60,7 +60,7 @@ static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum, return IP_DEFRAG_CONNTRACK_OUT + zone; } -static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, +static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -83,7 +83,9 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, #endif /* Gather fragments. */ if (ip_is_fragment(ip_hdr(skb))) { - enum ip_defrag_users user = nf_ct_defrag_user(hooknum, skb); + enum ip_defrag_users user = + nf_ct_defrag_user(ops->hooknum, skb); + if (nf_ct_ipv4_gather_frags(skb, user)) return NF_STOLEN; } diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c new file mode 100644 index 000000000000..3e67ef1c676f --- /dev/null +++ b/net/ipv4/netfilter/nf_tables_arp.c @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2008-2010 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2013 Pablo Neira Ayuso <pablo@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/netfilter_arp.h> +#include <net/netfilter/nf_tables.h> + +static struct nft_af_info nft_af_arp __read_mostly = { + .family = NFPROTO_ARP, + .nhooks = NF_ARP_NUMHOOKS, + .owner = THIS_MODULE, +}; + +static int nf_tables_arp_init_net(struct net *net) +{ + net->nft.arp = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL); + if (net->nft.arp== NULL) + return -ENOMEM; + + memcpy(net->nft.arp, &nft_af_arp, sizeof(nft_af_arp)); + + if (nft_register_afinfo(net, net->nft.arp) < 0) + goto err; + + return 0; +err: + kfree(net->nft.arp); + return -ENOMEM; +} + +static void nf_tables_arp_exit_net(struct net *net) +{ + nft_unregister_afinfo(net->nft.arp); + kfree(net->nft.arp); +} + +static struct pernet_operations nf_tables_arp_net_ops = { + .init = nf_tables_arp_init_net, + .exit = nf_tables_arp_exit_net, +}; + +static unsigned int +nft_do_chain_arp(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nft_pktinfo pkt; + + nft_set_pktinfo(&pkt, ops, skb, in, out); + + return nft_do_chain_pktinfo(&pkt, ops); +} + +static struct nf_chain_type filter_arp = { + .family = NFPROTO_ARP, + .name = "filter", + .type = NFT_CHAIN_T_DEFAULT, + .hook_mask = (1 << NF_ARP_IN) | + (1 << NF_ARP_OUT) | + (1 << NF_ARP_FORWARD), + .fn = { + [NF_ARP_IN] = nft_do_chain_arp, + [NF_ARP_OUT] = nft_do_chain_arp, + [NF_ARP_FORWARD] = nft_do_chain_arp, + }, +}; + +static int __init nf_tables_arp_init(void) +{ + int ret; + + nft_register_chain_type(&filter_arp); + ret = register_pernet_subsys(&nf_tables_arp_net_ops); + if (ret < 0) + nft_unregister_chain_type(&filter_arp); + + return ret; +} + +static void __exit nf_tables_arp_exit(void) +{ + unregister_pernet_subsys(&nf_tables_arp_net_ops); + nft_unregister_chain_type(&filter_arp); +} + +module_init(nf_tables_arp_init); +module_exit(nf_tables_arp_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS_NFT_FAMILY(3); /* NFPROTO_ARP */ diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c new file mode 100644 index 000000000000..8f7536be1322 --- /dev/null +++ b/net/ipv4/netfilter/nf_tables_ipv4.c @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2012-2013 Pablo Neira Ayuso <pablo@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/ip.h> +#include <linux/netfilter_ipv4.h> +#include <net/netfilter/nf_tables.h> +#include <net/net_namespace.h> +#include <net/ip.h> +#include <net/net_namespace.h> +#include <net/netfilter/nf_tables_ipv4.h> + +static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nft_pktinfo pkt; + + if (unlikely(skb->len < sizeof(struct iphdr) || + ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) { + if (net_ratelimit()) + pr_info("nf_tables_ipv4: ignoring short SOCK_RAW " + "packet\n"); + return NF_ACCEPT; + } + nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out); + + return nft_do_chain_pktinfo(&pkt, ops); +} + +static struct nft_af_info nft_af_ipv4 __read_mostly = { + .family = NFPROTO_IPV4, + .nhooks = NF_INET_NUMHOOKS, + .owner = THIS_MODULE, + .hooks = { + [NF_INET_LOCAL_OUT] = nft_ipv4_output, + }, +}; + +static int nf_tables_ipv4_init_net(struct net *net) +{ + net->nft.ipv4 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL); + if (net->nft.ipv4 == NULL) + return -ENOMEM; + + memcpy(net->nft.ipv4, &nft_af_ipv4, sizeof(nft_af_ipv4)); + + if (nft_register_afinfo(net, net->nft.ipv4) < 0) + goto err; + + return 0; +err: + kfree(net->nft.ipv4); + return -ENOMEM; +} + +static void nf_tables_ipv4_exit_net(struct net *net) +{ + nft_unregister_afinfo(net->nft.ipv4); + kfree(net->nft.ipv4); +} + +static struct pernet_operations nf_tables_ipv4_net_ops = { + .init = nf_tables_ipv4_init_net, + .exit = nf_tables_ipv4_exit_net, +}; + +static unsigned int +nft_do_chain_ipv4(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nft_pktinfo pkt; + + nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out); + + return nft_do_chain_pktinfo(&pkt, ops); +} + +static struct nf_chain_type filter_ipv4 = { + .family = NFPROTO_IPV4, + .name = "filter", + .type = NFT_CHAIN_T_DEFAULT, + .hook_mask = (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_FORWARD) | + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING), + .fn = { + [NF_INET_LOCAL_IN] = nft_do_chain_ipv4, + [NF_INET_LOCAL_OUT] = nft_ipv4_output, + [NF_INET_FORWARD] = nft_do_chain_ipv4, + [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4, + [NF_INET_POST_ROUTING] = nft_do_chain_ipv4, + }, +}; + +static int __init nf_tables_ipv4_init(void) +{ + nft_register_chain_type(&filter_ipv4); + return register_pernet_subsys(&nf_tables_ipv4_net_ops); +} + +static void __exit nf_tables_ipv4_exit(void) +{ + unregister_pernet_subsys(&nf_tables_ipv4_net_ops); + nft_unregister_chain_type(&filter_ipv4); +} + +module_init(nf_tables_ipv4_init); +module_exit(nf_tables_ipv4_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS_NFT_FAMILY(AF_INET); diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c new file mode 100644 index 000000000000..cf2c792cd971 --- /dev/null +++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c @@ -0,0 +1,205 @@ +/* + * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org> + * Copyright (c) 2012 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <linux/netfilter.h> +#include <linux/netfilter_ipv4.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_nat_core.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_ipv4.h> +#include <net/netfilter/nf_nat_l3proto.h> +#include <net/ip.h> + +/* + * NAT chains + */ + +static unsigned int nf_nat_fn(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct nf_conn_nat *nat; + enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); + struct nft_pktinfo pkt; + unsigned int ret; + + if (ct == NULL || nf_ct_is_untracked(ct)) + return NF_ACCEPT; + + NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))); + + nat = nfct_nat(ct); + if (nat == NULL) { + /* Conntrack module was loaded late, can't add extension. */ + if (nf_ct_is_confirmed(ct)) + return NF_ACCEPT; + nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); + if (nat == NULL) + return NF_ACCEPT; + } + + switch (ctinfo) { + case IP_CT_RELATED: + case IP_CT_RELATED + IP_CT_IS_REPLY: + if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { + if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, + ops->hooknum)) + return NF_DROP; + else + return NF_ACCEPT; + } + /* Fall through */ + case IP_CT_NEW: + if (nf_nat_initialized(ct, maniptype)) + break; + + nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out); + + ret = nft_do_chain_pktinfo(&pkt, ops); + if (ret != NF_ACCEPT) + return ret; + if (!nf_nat_initialized(ct, maniptype)) { + ret = nf_nat_alloc_null_binding(ct, ops->hooknum); + if (ret != NF_ACCEPT) + return ret; + } + default: + break; + } + + return nf_nat_packet(ct, ctinfo, ops->hooknum, skb); +} + +static unsigned int nf_nat_prerouting(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + __be32 daddr = ip_hdr(skb)->daddr; + unsigned int ret; + + ret = nf_nat_fn(ops, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN && + ip_hdr(skb)->daddr != daddr) { + skb_dst_drop(skb); + } + return ret; +} + +static unsigned int nf_nat_postrouting(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + enum ip_conntrack_info ctinfo __maybe_unused; + const struct nf_conn *ct __maybe_unused; + unsigned int ret; + + ret = nf_nat_fn(ops, skb, in, out, okfn); +#ifdef CONFIG_XFRM + if (ret != NF_DROP && ret != NF_STOLEN && + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (ct->tuplehash[dir].tuple.src.u3.ip != + ct->tuplehash[!dir].tuple.dst.u3.ip || + ct->tuplehash[dir].tuple.src.u.all != + ct->tuplehash[!dir].tuple.dst.u.all) + return nf_xfrm_me_harder(skb, AF_INET) == 0 ? + ret : NF_DROP; + } +#endif + return ret; +} + +static unsigned int nf_nat_output(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + enum ip_conntrack_info ctinfo; + const struct nf_conn *ct; + unsigned int ret; + + ret = nf_nat_fn(ops, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN && + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (ct->tuplehash[dir].tuple.dst.u3.ip != + ct->tuplehash[!dir].tuple.src.u3.ip) { + if (ip_route_me_harder(skb, RTN_UNSPEC)) + ret = NF_DROP; + } +#ifdef CONFIG_XFRM + else if (ct->tuplehash[dir].tuple.dst.u.all != + ct->tuplehash[!dir].tuple.src.u.all) + if (nf_xfrm_me_harder(skb, AF_INET)) + ret = NF_DROP; +#endif + } + return ret; +} + +static struct nf_chain_type nft_chain_nat_ipv4 = { + .family = NFPROTO_IPV4, + .name = "nat", + .type = NFT_CHAIN_T_NAT, + .hook_mask = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_LOCAL_IN), + .fn = { + [NF_INET_PRE_ROUTING] = nf_nat_prerouting, + [NF_INET_POST_ROUTING] = nf_nat_postrouting, + [NF_INET_LOCAL_OUT] = nf_nat_output, + [NF_INET_LOCAL_IN] = nf_nat_fn, + }, + .me = THIS_MODULE, +}; + +static int __init nft_chain_nat_init(void) +{ + int err; + + err = nft_register_chain_type(&nft_chain_nat_ipv4); + if (err < 0) + return err; + + return 0; +} + +static void __exit nft_chain_nat_exit(void) +{ + nft_unregister_chain_type(&nft_chain_nat_ipv4); +} + +module_init(nft_chain_nat_init); +module_exit(nft_chain_nat_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS_NFT_CHAIN(AF_INET, "nat"); diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c new file mode 100644 index 000000000000..4e6bf9a3d7aa --- /dev/null +++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/skbuff.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter_ipv4.h> +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_ipv4.h> +#include <net/route.h> +#include <net/ip.h> + +static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + unsigned int ret; + struct nft_pktinfo pkt; + u32 mark; + __be32 saddr, daddr; + u_int8_t tos; + const struct iphdr *iph; + + /* root is playing with raw sockets. */ + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) + return NF_ACCEPT; + + nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out); + + mark = skb->mark; + iph = ip_hdr(skb); + saddr = iph->saddr; + daddr = iph->daddr; + tos = iph->tos; + + ret = nft_do_chain_pktinfo(&pkt, ops); + if (ret != NF_DROP && ret != NF_QUEUE) { + iph = ip_hdr(skb); + + if (iph->saddr != saddr || + iph->daddr != daddr || + skb->mark != mark || + iph->tos != tos) + if (ip_route_me_harder(skb, RTN_UNSPEC)) + ret = NF_DROP; + } + return ret; +} + +static struct nf_chain_type nft_chain_route_ipv4 = { + .family = NFPROTO_IPV4, + .name = "route", + .type = NFT_CHAIN_T_ROUTE, + .hook_mask = (1 << NF_INET_LOCAL_OUT), + .fn = { + [NF_INET_LOCAL_OUT] = nf_route_table_hook, + }, + .me = THIS_MODULE, +}; + +static int __init nft_chain_route_init(void) +{ + return nft_register_chain_type(&nft_chain_route_ipv4); +} + +static void __exit nft_chain_route_exit(void) +{ + nft_unregister_chain_type(&nft_chain_route_ipv4); +} + +module_init(nft_chain_route_init); +module_exit(nft_chain_route_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS_NFT_CHAIN(AF_INET, "route"); diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c new file mode 100644 index 000000000000..fff5ba1a33b7 --- /dev/null +++ b/net/ipv4/netfilter/nft_reject_ipv4.c @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/icmp.h> + +struct nft_reject { + enum nft_reject_types type:8; + u8 icmp_code; +}; + +static void nft_reject_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_reject *priv = nft_expr_priv(expr); + + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + icmp_send(pkt->skb, ICMP_DEST_UNREACH, priv->icmp_code, 0); + break; + case NFT_REJECT_TCP_RST: + break; + } + + data[NFT_REG_VERDICT].verdict = NF_DROP; +} + +static const struct nla_policy nft_reject_policy[NFTA_REJECT_MAX + 1] = { + [NFTA_REJECT_TYPE] = { .type = NLA_U32 }, + [NFTA_REJECT_ICMP_CODE] = { .type = NLA_U8 }, +}; + +static int nft_reject_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_reject *priv = nft_expr_priv(expr); + + if (tb[NFTA_REJECT_TYPE] == NULL) + return -EINVAL; + + priv->type = ntohl(nla_get_be32(tb[NFTA_REJECT_TYPE])); + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + if (tb[NFTA_REJECT_ICMP_CODE] == NULL) + return -EINVAL; + priv->icmp_code = nla_get_u8(tb[NFTA_REJECT_ICMP_CODE]); + case NFT_REJECT_TCP_RST: + break; + default: + return -EINVAL; + } + + return 0; +} + +static int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_reject *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_REJECT_TYPE, priv->type)) + goto nla_put_failure; + + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code)) + goto nla_put_failure; + break; + } + + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_reject_type; +static const struct nft_expr_ops nft_reject_ops = { + .type = &nft_reject_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)), + .eval = nft_reject_eval, + .init = nft_reject_init, + .dump = nft_reject_dump, +}; + +static struct nft_expr_type nft_reject_type __read_mostly = { + .name = "reject", + .ops = &nft_reject_ops, + .policy = nft_reject_policy, + .maxattr = NFTA_REJECT_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_reject_module_init(void) +{ + return nft_register_expr(&nft_reject_type); +} + +static void __exit nft_reject_module_exit(void) +{ + nft_unregister_expr(&nft_reject_type); +} + +module_init(nft_reject_module_init); +module_exit(nft_reject_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS_NFT_EXPR("reject"); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6011615e810d..d2d325382b13 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -295,7 +295,7 @@ static int rt_cpu_seq_show(struct seq_file *seq, void *v) seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x " " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n", dst_entries_get_slow(&ipv4_dst_ops), - st->in_hit, + 0, /* st->in_hit */ st->in_slow_tot, st->in_slow_mc, st->in_no_route, @@ -303,16 +303,16 @@ static int rt_cpu_seq_show(struct seq_file *seq, void *v) st->in_martian_dst, st->in_martian_src, - st->out_hit, + 0, /* st->out_hit */ st->out_slow_tot, st->out_slow_mc, - st->gc_total, - st->gc_ignored, - st->gc_goal_miss, - st->gc_dst_overflow, - st->in_hlist_search, - st->out_hlist_search + 0, /* st->gc_total */ + 0, /* st->gc_ignored */ + 0, /* st->gc_goal_miss */ + 0, /* st->gc_dst_overflow */ + 0, /* st->in_hlist_search */ + 0 /* st->out_hlist_search */ ); return 0; } diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index a7f842b29b67..7702f9e90a04 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -25,6 +25,19 @@ config NF_CONNTRACK_IPV6 To compile it as a module, choose M here. If unsure, say N. +config NF_TABLES_IPV6 + depends on NF_TABLES + tristate "IPv6 nf_tables support" + +config NFT_CHAIN_ROUTE_IPV6 + depends on NF_TABLES_IPV6 + tristate "IPv6 nf_tables route chain support" + +config NFT_CHAIN_NAT_IPV6 + depends on NF_TABLES_IPV6 + depends on NF_NAT_IPV6 && NFT_NAT + tristate "IPv6 nf_tables nat chain support" + config IP6_NF_IPTABLES tristate "IP6 tables support (required for filtering)" depends on INET && IPV6 diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 2b53738f798c..d1b4928f34f7 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -23,6 +23,11 @@ obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o +# nf_tables +obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o +obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o +obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o + # matches obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index 2748b042da72..bf9f612c1bc2 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -312,7 +312,7 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par) return XT_CONTINUE; } -static unsigned int ipv6_synproxy_hook(unsigned int hooknum, +static unsigned int ipv6_synproxy_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 29b44b14c5ea..ca7f6c128086 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -32,13 +32,14 @@ static const struct xt_table packet_filter = { /* The work comes in here from netfilter.c. */ static unsigned int -ip6table_filter_hook(unsigned int hook, struct sk_buff *skb, +ip6table_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { const struct net *net = dev_net((in != NULL) ? in : out); - return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_filter); + return ip6t_do_table(skb, ops->hooknum, in, out, + net->ipv6.ip6table_filter); } static struct nf_hook_ops *filter_ops __read_mostly; diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index c705907ae6ab..307bbb782d14 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -76,17 +76,17 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out) /* The work comes in here from netfilter.c. */ static unsigned int -ip6table_mangle_hook(unsigned int hook, struct sk_buff *skb, +ip6table_mangle_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - if (hook == NF_INET_LOCAL_OUT) + if (ops->hooknum == NF_INET_LOCAL_OUT) return ip6t_mangle_out(skb, out); - if (hook == NF_INET_POST_ROUTING) - return ip6t_do_table(skb, hook, in, out, + if (ops->hooknum == NF_INET_POST_ROUTING) + return ip6t_do_table(skb, ops->hooknum, in, out, dev_net(out)->ipv6.ip6table_mangle); /* INPUT/FORWARD */ - return ip6t_do_table(skb, hook, in, out, + return ip6t_do_table(skb, ops->hooknum, in, out, dev_net(in)->ipv6.ip6table_mangle); } diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index 9b076d2d3a7b..84c7f33d0cf8 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -63,7 +63,7 @@ static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum, } static unsigned int -nf_nat_ipv6_fn(unsigned int hooknum, +nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -72,7 +72,7 @@ nf_nat_ipv6_fn(unsigned int hooknum, struct nf_conn *ct; enum ip_conntrack_info ctinfo; struct nf_conn_nat *nat; - enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); + enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); __be16 frag_off; int hdrlen; u8 nexthdr; @@ -111,7 +111,8 @@ nf_nat_ipv6_fn(unsigned int hooknum, if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo, - hooknum, hdrlen)) + ops->hooknum, + hdrlen)) return NF_DROP; else return NF_ACCEPT; @@ -124,14 +125,14 @@ nf_nat_ipv6_fn(unsigned int hooknum, if (!nf_nat_initialized(ct, maniptype)) { unsigned int ret; - ret = nf_nat_rule_find(skb, hooknum, in, out, ct); + ret = nf_nat_rule_find(skb, ops->hooknum, in, out, ct); if (ret != NF_ACCEPT) return ret; } else { pr_debug("Already setup manip %s for ct %p\n", maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", ct); - if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) + if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) goto oif_changed; } break; @@ -140,11 +141,11 @@ nf_nat_ipv6_fn(unsigned int hooknum, /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == IP_CT_ESTABLISHED_REPLY); - if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) + if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) goto oif_changed; } - return nf_nat_packet(ct, ctinfo, hooknum, skb); + return nf_nat_packet(ct, ctinfo, ops->hooknum, skb); oif_changed: nf_ct_kill_acct(ct, ctinfo, skb); @@ -152,7 +153,7 @@ oif_changed: } static unsigned int -nf_nat_ipv6_in(unsigned int hooknum, +nf_nat_ipv6_in(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -161,7 +162,7 @@ nf_nat_ipv6_in(unsigned int hooknum, unsigned int ret; struct in6_addr daddr = ipv6_hdr(skb)->daddr; - ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); + ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN && ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr)) skb_dst_drop(skb); @@ -170,7 +171,7 @@ nf_nat_ipv6_in(unsigned int hooknum, } static unsigned int -nf_nat_ipv6_out(unsigned int hooknum, +nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -187,7 +188,7 @@ nf_nat_ipv6_out(unsigned int hooknum, if (skb->len < sizeof(struct ipv6hdr)) return NF_ACCEPT; - ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); + ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn); #ifdef CONFIG_XFRM if (ret != NF_DROP && ret != NF_STOLEN && !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && @@ -209,7 +210,7 @@ nf_nat_ipv6_out(unsigned int hooknum, } static unsigned int -nf_nat_ipv6_local_fn(unsigned int hooknum, +nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -224,7 +225,7 @@ nf_nat_ipv6_local_fn(unsigned int hooknum, if (skb->len < sizeof(struct ipv6hdr)) return NF_ACCEPT; - ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); + ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN && (ct = nf_ct_get(skb, &ctinfo)) != NULL) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 9a626d86720f..5274740acecc 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -19,13 +19,14 @@ static const struct xt_table packet_raw = { /* The work comes in here from netfilter.c. */ static unsigned int -ip6table_raw_hook(unsigned int hook, struct sk_buff *skb, +ip6table_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { const struct net *net = dev_net((in != NULL) ? in : out); - return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_raw); + return ip6t_do_table(skb, ops->hooknum, in, out, + net->ipv6.ip6table_raw); } static struct nf_hook_ops *rawtable_ops __read_mostly; diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index ce88d1d7e525..ab3b0219ecfa 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -36,14 +36,15 @@ static const struct xt_table security_table = { }; static unsigned int -ip6table_security_hook(unsigned int hook, struct sk_buff *skb, +ip6table_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { const struct net *net = dev_net((in != NULL) ? in : out); - return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_security); + return ip6t_do_table(skb, ops->hooknum, in, out, + net->ipv6.ip6table_security); } static struct nf_hook_ops *sectbl_ops __read_mostly; diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 54b75ead5a69..486545eb42ce 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -95,7 +95,7 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, return NF_ACCEPT; } -static unsigned int ipv6_helper(unsigned int hooknum, +static unsigned int ipv6_helper(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -133,7 +133,7 @@ static unsigned int ipv6_helper(unsigned int hooknum, return helper->help(skb, protoff, ct, ctinfo); } -static unsigned int ipv6_confirm(unsigned int hooknum, +static unsigned int ipv6_confirm(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -219,16 +219,17 @@ static unsigned int __ipv6_conntrack_in(struct net *net, return nf_conntrack_in(net, PF_INET6, hooknum, skb); } -static unsigned int ipv6_conntrack_in(unsigned int hooknum, +static unsigned int ipv6_conntrack_in(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return __ipv6_conntrack_in(dev_net(in), hooknum, skb, in, out, okfn); + return __ipv6_conntrack_in(dev_net(in), ops->hooknum, skb, in, out, + okfn); } -static unsigned int ipv6_conntrack_local(unsigned int hooknum, +static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -239,7 +240,8 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum, net_notice_ratelimited("ipv6_conntrack_local: packet too short\n"); return NF_ACCEPT; } - return __ipv6_conntrack_in(dev_net(out), hooknum, skb, in, out, okfn); + return __ipv6_conntrack_in(dev_net(out), ops->hooknum, skb, in, out, + okfn); } static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index aacd121fe8c5..ec483aa3f60f 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -52,7 +52,7 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, } -static unsigned int ipv6_defrag(unsigned int hooknum, +static unsigned int ipv6_defrag(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -66,7 +66,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum, return NF_ACCEPT; #endif - reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb)); + reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(ops->hooknum, skb)); /* queued */ if (reasm == NULL) return NF_STOLEN; @@ -75,7 +75,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum, if (reasm == skb) return NF_ACCEPT; - nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in, + nf_ct_frag6_output(ops->hooknum, reasm, (struct net_device *)in, (struct net_device *)out, okfn); return NF_STOLEN; diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c new file mode 100644 index 000000000000..d77db8a13505 --- /dev/null +++ b/net/ipv6/netfilter/nf_tables_ipv6.c @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2012-2013 Pablo Neira Ayuso <pablo@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/ipv6.h> +#include <linux/netfilter_ipv6.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_ipv6.h> + +static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nft_pktinfo pkt; + + if (unlikely(skb->len < sizeof(struct ipv6hdr))) { + if (net_ratelimit()) + pr_info("nf_tables_ipv6: ignoring short SOCK_RAW " + "packet\n"); + return NF_ACCEPT; + } + if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0) + return NF_DROP; + + return nft_do_chain_pktinfo(&pkt, ops); +} + +static struct nft_af_info nft_af_ipv6 __read_mostly = { + .family = NFPROTO_IPV6, + .nhooks = NF_INET_NUMHOOKS, + .owner = THIS_MODULE, + .hooks = { + [NF_INET_LOCAL_OUT] = nft_ipv6_output, + }, +}; + +static int nf_tables_ipv6_init_net(struct net *net) +{ + net->nft.ipv6 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL); + if (net->nft.ipv6 == NULL) + return -ENOMEM; + + memcpy(net->nft.ipv6, &nft_af_ipv6, sizeof(nft_af_ipv6)); + + if (nft_register_afinfo(net, net->nft.ipv6) < 0) + goto err; + + return 0; +err: + kfree(net->nft.ipv6); + return -ENOMEM; +} + +static void nf_tables_ipv6_exit_net(struct net *net) +{ + nft_unregister_afinfo(net->nft.ipv6); + kfree(net->nft.ipv6); +} + +static struct pernet_operations nf_tables_ipv6_net_ops = { + .init = nf_tables_ipv6_init_net, + .exit = nf_tables_ipv6_exit_net, +}; + +static unsigned int +nft_do_chain_ipv6(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nft_pktinfo pkt; + + /* malformed packet, drop it */ + if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0) + return NF_DROP; + + return nft_do_chain_pktinfo(&pkt, ops); +} + +static struct nf_chain_type filter_ipv6 = { + .family = NFPROTO_IPV6, + .name = "filter", + .type = NFT_CHAIN_T_DEFAULT, + .hook_mask = (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_FORWARD) | + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING), + .fn = { + [NF_INET_LOCAL_IN] = nft_do_chain_ipv6, + [NF_INET_LOCAL_OUT] = nft_ipv6_output, + [NF_INET_FORWARD] = nft_do_chain_ipv6, + [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6, + [NF_INET_POST_ROUTING] = nft_do_chain_ipv6, + }, +}; + +static int __init nf_tables_ipv6_init(void) +{ + nft_register_chain_type(&filter_ipv6); + return register_pernet_subsys(&nf_tables_ipv6_net_ops); +} + +static void __exit nf_tables_ipv6_exit(void) +{ + unregister_pernet_subsys(&nf_tables_ipv6_net_ops); + nft_unregister_chain_type(&filter_ipv6); +} + +module_init(nf_tables_ipv6_init); +module_exit(nf_tables_ipv6_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS_NFT_FAMILY(AF_INET6); diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c new file mode 100644 index 000000000000..e86dcd70dc76 --- /dev/null +++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2011 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2012 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <linux/netfilter.h> +#include <linux/netfilter_ipv6.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_nat_core.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_ipv6.h> +#include <net/netfilter/nf_nat_l3proto.h> +#include <net/ipv6.h> + +/* + * IPv6 NAT chains + */ + +static unsigned int nf_nat_ipv6_fn(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct nf_conn_nat *nat; + enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); + __be16 frag_off; + int hdrlen; + u8 nexthdr; + struct nft_pktinfo pkt; + unsigned int ret; + + if (ct == NULL || nf_ct_is_untracked(ct)) + return NF_ACCEPT; + + nat = nfct_nat(ct); + if (nat == NULL) { + /* Conntrack module was loaded late, can't add extension. */ + if (nf_ct_is_confirmed(ct)) + return NF_ACCEPT; + nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); + if (nat == NULL) + return NF_ACCEPT; + } + + switch (ctinfo) { + case IP_CT_RELATED: + case IP_CT_RELATED + IP_CT_IS_REPLY: + nexthdr = ipv6_hdr(skb)->nexthdr; + hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), + &nexthdr, &frag_off); + + if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { + if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo, + ops->hooknum, + hdrlen)) + return NF_DROP; + else + return NF_ACCEPT; + } + /* Fall through */ + case IP_CT_NEW: + if (nf_nat_initialized(ct, maniptype)) + break; + + nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out); + + ret = nft_do_chain_pktinfo(&pkt, ops); + if (ret != NF_ACCEPT) + return ret; + if (!nf_nat_initialized(ct, maniptype)) { + ret = nf_nat_alloc_null_binding(ct, ops->hooknum); + if (ret != NF_ACCEPT) + return ret; + } + default: + break; + } + + return nf_nat_packet(ct, ctinfo, ops->hooknum, skb); +} + +static unsigned int nf_nat_ipv6_prerouting(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct in6_addr daddr = ipv6_hdr(skb)->daddr; + unsigned int ret; + + ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN && + ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr)) + skb_dst_drop(skb); + + return ret; +} + +static unsigned int nf_nat_ipv6_postrouting(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + enum ip_conntrack_info ctinfo __maybe_unused; + const struct nf_conn *ct __maybe_unused; + unsigned int ret; + + ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn); +#ifdef CONFIG_XFRM + if (ret != NF_DROP && ret != NF_STOLEN && + !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3, + &ct->tuplehash[!dir].tuple.dst.u3) || + (ct->tuplehash[dir].tuple.src.u.all != + ct->tuplehash[!dir].tuple.dst.u.all)) + if (nf_xfrm_me_harder(skb, AF_INET6) < 0) + ret = NF_DROP; + } +#endif + return ret; +} + +static unsigned int nf_nat_ipv6_output(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + enum ip_conntrack_info ctinfo; + const struct nf_conn *ct; + unsigned int ret; + + ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN && + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, + &ct->tuplehash[!dir].tuple.src.u3)) { + if (ip6_route_me_harder(skb)) + ret = NF_DROP; + } +#ifdef CONFIG_XFRM + else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && + ct->tuplehash[dir].tuple.dst.u.all != + ct->tuplehash[!dir].tuple.src.u.all) + if (nf_xfrm_me_harder(skb, AF_INET6)) + ret = NF_DROP; +#endif + } + return ret; +} + +static struct nf_chain_type nft_chain_nat_ipv6 = { + .family = NFPROTO_IPV6, + .name = "nat", + .type = NFT_CHAIN_T_NAT, + .hook_mask = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_LOCAL_IN), + .fn = { + [NF_INET_PRE_ROUTING] = nf_nat_ipv6_prerouting, + [NF_INET_POST_ROUTING] = nf_nat_ipv6_postrouting, + [NF_INET_LOCAL_OUT] = nf_nat_ipv6_output, + [NF_INET_LOCAL_IN] = nf_nat_ipv6_fn, + }, + .me = THIS_MODULE, +}; + +static int __init nft_chain_nat_ipv6_init(void) +{ + int err; + + err = nft_register_chain_type(&nft_chain_nat_ipv6); + if (err < 0) + return err; + + return 0; +} + +static void __exit nft_chain_nat_ipv6_exit(void) +{ + nft_unregister_chain_type(&nft_chain_nat_ipv6); +} + +module_init(nft_chain_nat_ipv6_init); +module_exit(nft_chain_nat_ipv6_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>"); +MODULE_ALIAS_NFT_CHAIN(AF_INET6, "nat"); diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c new file mode 100644 index 000000000000..3fe40f0456ad --- /dev/null +++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/skbuff.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter_ipv6.h> +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_ipv6.h> +#include <net/route.h> + +static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + unsigned int ret; + struct nft_pktinfo pkt; + struct in6_addr saddr, daddr; + u_int8_t hop_limit; + u32 mark, flowlabel; + + /* malformed packet, drop it */ + if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0) + return NF_DROP; + + /* save source/dest address, mark, hoplimit, flowlabel, priority */ + memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr)); + memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr)); + mark = skb->mark; + hop_limit = ipv6_hdr(skb)->hop_limit; + + /* flowlabel and prio (includes version, which shouldn't change either */ + flowlabel = *((u32 *)ipv6_hdr(skb)); + + ret = nft_do_chain_pktinfo(&pkt, ops); + if (ret != NF_DROP && ret != NF_QUEUE && + (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) || + memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) || + skb->mark != mark || + ipv6_hdr(skb)->hop_limit != hop_limit || + flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) + return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP; + + return ret; +} + +static struct nf_chain_type nft_chain_route_ipv6 = { + .family = NFPROTO_IPV6, + .name = "route", + .type = NFT_CHAIN_T_ROUTE, + .hook_mask = (1 << NF_INET_LOCAL_OUT), + .fn = { + [NF_INET_LOCAL_OUT] = nf_route_table_hook, + }, + .me = THIS_MODULE, +}; + +static int __init nft_chain_route_init(void) +{ + return nft_register_chain_type(&nft_chain_route_ipv6); +} + +static void __exit nft_chain_route_exit(void) +{ + nft_unregister_chain_type(&nft_chain_route_ipv6); +} + +module_init(nft_chain_route_init); +module_exit(nft_chain_route_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS_NFT_CHAIN(AF_INET6, "route"); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 6e839b6dff2b..48acec17e27a 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -413,6 +413,58 @@ config NETFILTER_SYNPROXY endif # NF_CONNTRACK +config NF_TABLES + depends on NETFILTER_NETLINK + tristate "Netfilter nf_tables support" + +config NFT_EXTHDR + depends on NF_TABLES + tristate "Netfilter nf_tables IPv6 exthdr module" + +config NFT_META + depends on NF_TABLES + tristate "Netfilter nf_tables meta module" + +config NFT_CT + depends on NF_TABLES + depends on NF_CONNTRACK + tristate "Netfilter nf_tables conntrack module" + +config NFT_RBTREE + depends on NF_TABLES + tristate "Netfilter nf_tables rbtree set module" + +config NFT_HASH + depends on NF_TABLES + tristate "Netfilter nf_tables hash set module" + +config NFT_COUNTER + depends on NF_TABLES + tristate "Netfilter nf_tables counter module" + +config NFT_LOG + depends on NF_TABLES + tristate "Netfilter nf_tables log module" + +config NFT_LIMIT + depends on NF_TABLES + tristate "Netfilter nf_tables limit module" + +config NFT_NAT + depends on NF_TABLES + depends on NF_CONNTRACK + depends on NF_NAT + tristate "Netfilter nf_tables nat module" + +config NFT_COMPAT + depends on NF_TABLES + depends on NETFILTER_XTABLES + tristate "Netfilter x_tables over nf_tables module" + help + This is required if you intend to use any of existing + x_tables match/target extensions over the nf_tables + framework. + config NETFILTER_XTABLES tristate "Netfilter Xtables support (required for ip_tables)" default m if NETFILTER_ADVANCED=n diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index c3a0a12907f6..394483b2c193 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -64,6 +64,24 @@ obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o # SYNPROXY obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o +# nf_tables +nf_tables-objs += nf_tables_core.o nf_tables_api.o +nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o +nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o + +obj-$(CONFIG_NF_TABLES) += nf_tables.o +obj-$(CONFIG_NFT_COMPAT) += nft_compat.o +obj-$(CONFIG_NFT_EXTHDR) += nft_exthdr.o +obj-$(CONFIG_NFT_META) += nft_meta.o +obj-$(CONFIG_NFT_CT) += nft_ct.o +obj-$(CONFIG_NFT_LIMIT) += nft_limit.o +obj-$(CONFIG_NFT_NAT) += nft_nat.o +#nf_tables-objs += nft_meta_target.o +obj-$(CONFIG_NFT_RBTREE) += nft_rbtree.o +obj-$(CONFIG_NFT_HASH) += nft_hash.o +obj-$(CONFIG_NFT_COUNTER) += nft_counter.o +obj-$(CONFIG_NFT_LOG) += nft_log.o + # generic X tables obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 593b16ea45e0..1fbab0cdd302 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -146,7 +146,7 @@ unsigned int nf_iterate(struct list_head *head, /* Optimization: we don't need to hold module reference here, since function can't sleep. --RR */ repeat: - verdict = (*elemp)->hook(hook, skb, indev, outdev, okfn); + verdict = (*elemp)->hook(*elemp, skb, indev, outdev, okfn); if (verdict != NF_ACCEPT) { #ifdef CONFIG_NETFILTER_DEBUG if (unlikely((verdict & NF_VERDICT_MASK) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 74fd00c27210..34fda62f40f6 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1239,11 +1239,11 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) * Check if packet is reply for established ip_vs_conn. */ static unsigned int -ip_vs_reply4(unsigned int hooknum, struct sk_buff *skb, +ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_out(hooknum, skb, AF_INET); + return ip_vs_out(ops->hooknum, skb, AF_INET); } /* @@ -1251,11 +1251,11 @@ ip_vs_reply4(unsigned int hooknum, struct sk_buff *skb, * Check if packet is reply for established ip_vs_conn. */ static unsigned int -ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb, +ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_out(hooknum, skb, AF_INET); + return ip_vs_out(ops->hooknum, skb, AF_INET); } #ifdef CONFIG_IP_VS_IPV6 @@ -1266,11 +1266,11 @@ ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb, * Check if packet is reply for established ip_vs_conn. */ static unsigned int -ip_vs_reply6(unsigned int hooknum, struct sk_buff *skb, +ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_out(hooknum, skb, AF_INET6); + return ip_vs_out(ops->hooknum, skb, AF_INET6); } /* @@ -1278,11 +1278,11 @@ ip_vs_reply6(unsigned int hooknum, struct sk_buff *skb, * Check if packet is reply for established ip_vs_conn. */ static unsigned int -ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb, +ip_vs_local_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_out(hooknum, skb, AF_INET6); + return ip_vs_out(ops->hooknum, skb, AF_INET6); } #endif @@ -1733,12 +1733,12 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) * Schedule and forward packets from remote clients */ static unsigned int -ip_vs_remote_request4(unsigned int hooknum, struct sk_buff *skb, +ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_in(hooknum, skb, AF_INET); + return ip_vs_in(ops->hooknum, skb, AF_INET); } /* @@ -1746,11 +1746,11 @@ ip_vs_remote_request4(unsigned int hooknum, struct sk_buff *skb, * Schedule and forward packets from local clients */ static unsigned int -ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb, +ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_in(hooknum, skb, AF_INET); + return ip_vs_in(ops->hooknum, skb, AF_INET); } #ifdef CONFIG_IP_VS_IPV6 @@ -1760,7 +1760,7 @@ ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb, * Copy info from first fragment, to the rest of them. */ static unsigned int -ip_vs_preroute_frag6(unsigned int hooknum, struct sk_buff *skb, +ip_vs_preroute_frag6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -1792,12 +1792,12 @@ ip_vs_preroute_frag6(unsigned int hooknum, struct sk_buff *skb, * Schedule and forward packets from remote clients */ static unsigned int -ip_vs_remote_request6(unsigned int hooknum, struct sk_buff *skb, +ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_in(hooknum, skb, AF_INET6); + return ip_vs_in(ops->hooknum, skb, AF_INET6); } /* @@ -1805,11 +1805,11 @@ ip_vs_remote_request6(unsigned int hooknum, struct sk_buff *skb, * Schedule and forward packets from local clients */ static unsigned int -ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb, +ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_in(hooknum, skb, AF_INET6); + return ip_vs_in(ops->hooknum, skb, AF_INET6); } #endif @@ -1825,7 +1825,7 @@ ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb, * and send them to ip_vs_in_icmp. */ static unsigned int -ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb, +ip_vs_forward_icmp(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { @@ -1842,12 +1842,12 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb, if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; - return ip_vs_in_icmp(skb, &r, hooknum); + return ip_vs_in_icmp(skb, &r, ops->hooknum); } #ifdef CONFIG_IP_VS_IPV6 static unsigned int -ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb, +ip_vs_forward_icmp_v6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { @@ -1866,7 +1866,7 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb, if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; - return ip_vs_in_icmp_v6(skb, &r, hooknum, &iphdr); + return ip_vs_in_icmp_v6(skb, &r, ops->hooknum, &iphdr); } #endif diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 6f0f4f7f68a5..63a815402211 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -432,6 +432,26 @@ nf_nat_setup_info(struct nf_conn *ct, } EXPORT_SYMBOL(nf_nat_setup_info); +unsigned int +nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) +{ + /* Force range to this IP; let proto decide mapping for + * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). + * Use reply in case it's already been mangled (eg local packet). + */ + union nf_inet_addr ip = + (HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ? + ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3 : + ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3); + struct nf_nat_range range = { + .flags = NF_NAT_RANGE_MAP_IPS, + .min_addr = ip, + .max_addr = ip, + }; + return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum)); +} +EXPORT_SYMBOL_GPL(nf_nat_alloc_null_binding); + /* Do packet manipulations according to nf_nat_setup_info. */ unsigned int nf_nat_packet(struct nf_conn *ct, enum ip_conntrack_info ctinfo, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c new file mode 100644 index 000000000000..dcddc49c0e08 --- /dev/null +++ b/net/netfilter/nf_tables_api.c @@ -0,0 +1,3275 @@ +/* + * Copyright (c) 2007-2009 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/skbuff.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> +#include <net/netfilter/nf_tables.h> +#include <net/net_namespace.h> +#include <net/sock.h> + +static LIST_HEAD(nf_tables_expressions); + +/** + * nft_register_afinfo - register nf_tables address family info + * + * @afi: address family info to register + * + * Register the address family for use with nf_tables. Returns zero on + * success or a negative errno code otherwise. + */ +int nft_register_afinfo(struct net *net, struct nft_af_info *afi) +{ + INIT_LIST_HEAD(&afi->tables); + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_add_tail(&afi->list, &net->nft.af_info); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + return 0; +} +EXPORT_SYMBOL_GPL(nft_register_afinfo); + +/** + * nft_unregister_afinfo - unregister nf_tables address family info + * + * @afi: address family info to unregister + * + * Unregister the address family for use with nf_tables. + */ +void nft_unregister_afinfo(struct nft_af_info *afi) +{ + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_del(&afi->list); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); +} +EXPORT_SYMBOL_GPL(nft_unregister_afinfo); + +static struct nft_af_info *nft_afinfo_lookup(struct net *net, int family) +{ + struct nft_af_info *afi; + + list_for_each_entry(afi, &net->nft.af_info, list) { + if (afi->family == family) + return afi; + } + return NULL; +} + +static struct nft_af_info * +nf_tables_afinfo_lookup(struct net *net, int family, bool autoload) +{ + struct nft_af_info *afi; + + afi = nft_afinfo_lookup(net, family); + if (afi != NULL) + return afi; +#ifdef CONFIG_MODULES + if (autoload) { + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + request_module("nft-afinfo-%u", family); + nfnl_lock(NFNL_SUBSYS_NFTABLES); + afi = nft_afinfo_lookup(net, family); + if (afi != NULL) + return ERR_PTR(-EAGAIN); + } +#endif + return ERR_PTR(-EAFNOSUPPORT); +} + +/* + * Tables + */ + +static struct nft_table *nft_table_lookup(const struct nft_af_info *afi, + const struct nlattr *nla) +{ + struct nft_table *table; + + list_for_each_entry(table, &afi->tables, list) { + if (!nla_strcmp(nla, table->name)) + return table; + } + return NULL; +} + +static struct nft_table *nf_tables_table_lookup(const struct nft_af_info *afi, + const struct nlattr *nla) +{ + struct nft_table *table; + + if (nla == NULL) + return ERR_PTR(-EINVAL); + + table = nft_table_lookup(afi, nla); + if (table != NULL) + return table; + + return ERR_PTR(-ENOENT); +} + +static inline u64 nf_tables_alloc_handle(struct nft_table *table) +{ + return ++table->hgenerator; +} + +static struct nf_chain_type *chain_type[AF_MAX][NFT_CHAIN_T_MAX]; + +static int __nf_tables_chain_type_lookup(int family, const struct nlattr *nla) +{ + int i; + + for (i=0; i<NFT_CHAIN_T_MAX; i++) { + if (chain_type[family][i] != NULL && + !nla_strcmp(nla, chain_type[family][i]->name)) + return i; + } + return -1; +} + +static int nf_tables_chain_type_lookup(const struct nft_af_info *afi, + const struct nlattr *nla, + bool autoload) +{ + int type; + + type = __nf_tables_chain_type_lookup(afi->family, nla); +#ifdef CONFIG_MODULES + if (type < 0 && autoload) { + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + request_module("nft-chain-%u-%*.s", afi->family, + nla_len(nla)-1, (const char *)nla_data(nla)); + nfnl_lock(NFNL_SUBSYS_NFTABLES); + type = __nf_tables_chain_type_lookup(afi->family, nla); + } +#endif + return type; +} + +static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = { + [NFTA_TABLE_NAME] = { .type = NLA_STRING }, + [NFTA_TABLE_FLAGS] = { .type = NLA_U32 }, +}; + +static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq, + int event, u32 flags, int family, + const struct nft_table *table) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + + event |= NFNL_SUBSYS_NFTABLES << 8; + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags); + if (nlh == NULL) + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) || + nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags))) + goto nla_put_failure; + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_trim(skb, nlh); + return -1; +} + +static int nf_tables_table_notify(const struct sk_buff *oskb, + const struct nlmsghdr *nlh, + const struct nft_table *table, + int event, int family) +{ + struct sk_buff *skb; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + u32 seq = nlh ? nlh->nlmsg_seq : 0; + struct net *net = oskb ? sock_net(oskb->sk) : &init_net; + bool report; + int err; + + report = nlh ? nlmsg_report(nlh) : false; + if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) + return 0; + + err = -ENOBUFS; + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + goto err; + + err = nf_tables_fill_table_info(skb, portid, seq, event, 0, + family, table); + if (err < 0) { + kfree_skb(skb); + goto err; + } + + err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, + GFP_KERNEL); +err: + if (err < 0) + nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); + return err; +} + +static int nf_tables_dump_tables(struct sk_buff *skb, + struct netlink_callback *cb) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + unsigned int idx = 0, s_idx = cb->args[0]; + struct net *net = sock_net(skb->sk); + int family = nfmsg->nfgen_family; + + list_for_each_entry(afi, &net->nft.af_info, list) { + if (family != NFPROTO_UNSPEC && family != afi->family) + continue; + + list_for_each_entry(table, &afi->tables, list) { + if (idx < s_idx) + goto cont; + if (idx > s_idx) + memset(&cb->args[1], 0, + sizeof(cb->args) - sizeof(cb->args[0])); + if (nf_tables_fill_table_info(skb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFT_MSG_NEWTABLE, + NLM_F_MULTI, + afi->family, table) < 0) + goto done; +cont: + idx++; + } + } +done: + cb->args[0] = idx; + return skb->len; +} + +static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + struct sk_buff *skb2; + struct net *net = sock_net(skb->sk); + int family = nfmsg->nfgen_family; + int err; + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = nf_tables_dump_tables, + }; + return netlink_dump_start(nlsk, skb, nlh, &c); + } + + afi = nf_tables_afinfo_lookup(net, family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]); + if (IS_ERR(table)) + return PTR_ERR(table); + + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb2) + return -ENOMEM; + + err = nf_tables_fill_table_info(skb2, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, NFT_MSG_NEWTABLE, 0, + family, table); + if (err < 0) + goto err; + + return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); + +err: + kfree_skb(skb2); + return err; +} + +static int nf_tables_table_enable(struct nft_table *table) +{ + struct nft_chain *chain; + int err, i = 0; + + list_for_each_entry(chain, &table->chains, list) { + err = nf_register_hook(&nft_base_chain(chain)->ops); + if (err < 0) + goto err; + + i++; + } + return 0; +err: + list_for_each_entry(chain, &table->chains, list) { + if (i-- <= 0) + break; + + nf_unregister_hook(&nft_base_chain(chain)->ops); + } + return err; +} + +static int nf_tables_table_disable(struct nft_table *table) +{ + struct nft_chain *chain; + + list_for_each_entry(chain, &table->chains, list) + nf_unregister_hook(&nft_base_chain(chain)->ops); + + return 0; +} + +static int nf_tables_updtable(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[], + struct nft_af_info *afi, struct nft_table *table) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + int family = nfmsg->nfgen_family, ret = 0; + + if (nla[NFTA_TABLE_FLAGS]) { + __be32 flags; + + flags = ntohl(nla_get_be32(nla[NFTA_TABLE_FLAGS])); + if (flags & ~NFT_TABLE_F_DORMANT) + return -EINVAL; + + if ((flags & NFT_TABLE_F_DORMANT) && + !(table->flags & NFT_TABLE_F_DORMANT)) { + ret = nf_tables_table_disable(table); + if (ret >= 0) + table->flags |= NFT_TABLE_F_DORMANT; + } else if (!(flags & NFT_TABLE_F_DORMANT) && + table->flags & NFT_TABLE_F_DORMANT) { + ret = nf_tables_table_enable(table); + if (ret >= 0) + table->flags &= ~NFT_TABLE_F_DORMANT; + } + if (ret < 0) + goto err; + } + + nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family); +err: + return ret; +} + +static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nlattr *name; + struct nft_af_info *afi; + struct nft_table *table; + struct net *net = sock_net(skb->sk); + int family = nfmsg->nfgen_family; + + afi = nf_tables_afinfo_lookup(net, family, true); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + name = nla[NFTA_TABLE_NAME]; + table = nf_tables_table_lookup(afi, name); + if (IS_ERR(table)) { + if (PTR_ERR(table) != -ENOENT) + return PTR_ERR(table); + table = NULL; + } + + if (table != NULL) { + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; + if (nlh->nlmsg_flags & NLM_F_REPLACE) + return -EOPNOTSUPP; + return nf_tables_updtable(nlsk, skb, nlh, nla, afi, table); + } + + table = kzalloc(sizeof(*table) + nla_len(name), GFP_KERNEL); + if (table == NULL) + return -ENOMEM; + + nla_strlcpy(table->name, name, nla_len(name)); + INIT_LIST_HEAD(&table->chains); + INIT_LIST_HEAD(&table->sets); + + if (nla[NFTA_TABLE_FLAGS]) { + __be32 flags; + + flags = ntohl(nla_get_be32(nla[NFTA_TABLE_FLAGS])); + if (flags & ~NFT_TABLE_F_DORMANT) { + kfree(table); + return -EINVAL; + } + + table->flags |= flags; + } + + list_add_tail(&table->list, &afi->tables); + nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family); + return 0; +} + +static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + struct nft_af_info *afi; + struct nft_table *table; + struct net *net = sock_net(skb->sk); + int family = nfmsg->nfgen_family; + + afi = nf_tables_afinfo_lookup(net, family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]); + if (IS_ERR(table)) + return PTR_ERR(table); + + if (table->use) + return -EBUSY; + + list_del(&table->list); + nf_tables_table_notify(skb, nlh, table, NFT_MSG_DELTABLE, family); + kfree(table); + return 0; +} + +int nft_register_chain_type(struct nf_chain_type *ctype) +{ + int err = 0; + + nfnl_lock(NFNL_SUBSYS_NFTABLES); + if (chain_type[ctype->family][ctype->type] != NULL) { + err = -EBUSY; + goto out; + } + + if (!try_module_get(ctype->me)) + goto out; + + chain_type[ctype->family][ctype->type] = ctype; +out: + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + return err; +} +EXPORT_SYMBOL_GPL(nft_register_chain_type); + +void nft_unregister_chain_type(struct nf_chain_type *ctype) +{ + nfnl_lock(NFNL_SUBSYS_NFTABLES); + chain_type[ctype->family][ctype->type] = NULL; + module_put(ctype->me); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); +} +EXPORT_SYMBOL_GPL(nft_unregister_chain_type); + +/* + * Chains + */ + +static struct nft_chain * +nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle) +{ + struct nft_chain *chain; + + list_for_each_entry(chain, &table->chains, list) { + if (chain->handle == handle) + return chain; + } + + return ERR_PTR(-ENOENT); +} + +static struct nft_chain *nf_tables_chain_lookup(const struct nft_table *table, + const struct nlattr *nla) +{ + struct nft_chain *chain; + + if (nla == NULL) + return ERR_PTR(-EINVAL); + + list_for_each_entry(chain, &table->chains, list) { + if (!nla_strcmp(nla, chain->name)) + return chain; + } + + return ERR_PTR(-ENOENT); +} + +static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = { + [NFTA_CHAIN_TABLE] = { .type = NLA_STRING }, + [NFTA_CHAIN_HANDLE] = { .type = NLA_U64 }, + [NFTA_CHAIN_NAME] = { .type = NLA_STRING, + .len = NFT_CHAIN_MAXNAMELEN - 1 }, + [NFTA_CHAIN_HOOK] = { .type = NLA_NESTED }, + [NFTA_CHAIN_POLICY] = { .type = NLA_U32 }, + [NFTA_CHAIN_TYPE] = { .type = NLA_NUL_STRING }, + [NFTA_CHAIN_COUNTERS] = { .type = NLA_NESTED }, +}; + +static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = { + [NFTA_HOOK_HOOKNUM] = { .type = NLA_U32 }, + [NFTA_HOOK_PRIORITY] = { .type = NLA_U32 }, +}; + +static int nft_dump_stats(struct sk_buff *skb, struct nft_stats __percpu *stats) +{ + struct nft_stats *cpu_stats, total; + struct nlattr *nest; + int cpu; + + memset(&total, 0, sizeof(total)); + for_each_possible_cpu(cpu) { + cpu_stats = per_cpu_ptr(stats, cpu); + total.pkts += cpu_stats->pkts; + total.bytes += cpu_stats->bytes; + } + nest = nla_nest_start(skb, NFTA_CHAIN_COUNTERS); + if (nest == NULL) + goto nla_put_failure; + + if (nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.pkts)) || + nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes))) + goto nla_put_failure; + + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + return -ENOSPC; +} + +static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq, + int event, u32 flags, int family, + const struct nft_table *table, + const struct nft_chain *chain) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + + event |= NFNL_SUBSYS_NFTABLES << 8; + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags); + if (nlh == NULL) + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_string(skb, NFTA_CHAIN_TABLE, table->name)) + goto nla_put_failure; + if (nla_put_be64(skb, NFTA_CHAIN_HANDLE, cpu_to_be64(chain->handle))) + goto nla_put_failure; + if (nla_put_string(skb, NFTA_CHAIN_NAME, chain->name)) + goto nla_put_failure; + + if (chain->flags & NFT_BASE_CHAIN) { + const struct nft_base_chain *basechain = nft_base_chain(chain); + const struct nf_hook_ops *ops = &basechain->ops; + struct nlattr *nest; + + nest = nla_nest_start(skb, NFTA_CHAIN_HOOK); + if (nest == NULL) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_HOOK_HOOKNUM, htonl(ops->hooknum))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority))) + goto nla_put_failure; + nla_nest_end(skb, nest); + + if (nla_put_be32(skb, NFTA_CHAIN_POLICY, + htonl(basechain->policy))) + goto nla_put_failure; + + if (nla_put_string(skb, NFTA_CHAIN_TYPE, + chain_type[ops->pf][nft_base_chain(chain)->type]->name)) + goto nla_put_failure; + + if (nft_dump_stats(skb, nft_base_chain(chain)->stats)) + goto nla_put_failure; + } + + if (nla_put_be32(skb, NFTA_CHAIN_USE, htonl(chain->use))) + goto nla_put_failure; + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_trim(skb, nlh); + return -1; +} + +static int nf_tables_chain_notify(const struct sk_buff *oskb, + const struct nlmsghdr *nlh, + const struct nft_table *table, + const struct nft_chain *chain, + int event, int family) +{ + struct sk_buff *skb; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + struct net *net = oskb ? sock_net(oskb->sk) : &init_net; + u32 seq = nlh ? nlh->nlmsg_seq : 0; + bool report; + int err; + + report = nlh ? nlmsg_report(nlh) : false; + if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) + return 0; + + err = -ENOBUFS; + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + goto err; + + err = nf_tables_fill_chain_info(skb, portid, seq, event, 0, family, + table, chain); + if (err < 0) { + kfree_skb(skb); + goto err; + } + + err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, + GFP_KERNEL); +err: + if (err < 0) + nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); + return err; +} + +static int nf_tables_dump_chains(struct sk_buff *skb, + struct netlink_callback *cb) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + const struct nft_chain *chain; + unsigned int idx = 0, s_idx = cb->args[0]; + struct net *net = sock_net(skb->sk); + int family = nfmsg->nfgen_family; + + list_for_each_entry(afi, &net->nft.af_info, list) { + if (family != NFPROTO_UNSPEC && family != afi->family) + continue; + + list_for_each_entry(table, &afi->tables, list) { + list_for_each_entry(chain, &table->chains, list) { + if (idx < s_idx) + goto cont; + if (idx > s_idx) + memset(&cb->args[1], 0, + sizeof(cb->args) - sizeof(cb->args[0])); + if (nf_tables_fill_chain_info(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFT_MSG_NEWCHAIN, + NLM_F_MULTI, + afi->family, table, chain) < 0) + goto done; +cont: + idx++; + } + } + } +done: + cb->args[0] = idx; + return skb->len; +} + + +static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + const struct nft_chain *chain; + struct sk_buff *skb2; + struct net *net = sock_net(skb->sk); + int family = nfmsg->nfgen_family; + int err; + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = nf_tables_dump_chains, + }; + return netlink_dump_start(nlsk, skb, nlh, &c); + } + + afi = nf_tables_afinfo_lookup(net, family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]); + if (IS_ERR(table)) + return PTR_ERR(table); + + chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]); + if (IS_ERR(chain)) + return PTR_ERR(chain); + + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb2) + return -ENOMEM; + + err = nf_tables_fill_chain_info(skb2, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, NFT_MSG_NEWCHAIN, 0, + family, table, chain); + if (err < 0) + goto err; + + return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); + +err: + kfree_skb(skb2); + return err; +} + +static int +nf_tables_chain_policy(struct nft_base_chain *chain, const struct nlattr *attr) +{ + switch (ntohl(nla_get_be32(attr))) { + case NF_DROP: + chain->policy = NF_DROP; + break; + case NF_ACCEPT: + chain->policy = NF_ACCEPT; + break; + default: + return -EINVAL; + } + return 0; +} + +static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = { + [NFTA_COUNTER_PACKETS] = { .type = NLA_U64 }, + [NFTA_COUNTER_BYTES] = { .type = NLA_U64 }, +}; + +static int +nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr) +{ + struct nlattr *tb[NFTA_COUNTER_MAX+1]; + struct nft_stats __percpu *newstats; + struct nft_stats *stats; + int err; + + err = nla_parse_nested(tb, NFTA_COUNTER_MAX, attr, nft_counter_policy); + if (err < 0) + return err; + + if (!tb[NFTA_COUNTER_BYTES] || !tb[NFTA_COUNTER_PACKETS]) + return -EINVAL; + + newstats = alloc_percpu(struct nft_stats); + if (newstats == NULL) + return -ENOMEM; + + /* Restore old counters on this cpu, no problem. Per-cpu statistics + * are not exposed to userspace. + */ + stats = this_cpu_ptr(newstats); + stats->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES])); + stats->pkts = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS])); + + if (chain->stats) { + /* nfnl_lock is held, add some nfnl function for this, later */ + struct nft_stats __percpu *oldstats = + rcu_dereference_protected(chain->stats, 1); + + rcu_assign_pointer(chain->stats, newstats); + synchronize_rcu(); + free_percpu(oldstats); + } else + rcu_assign_pointer(chain->stats, newstats); + + return 0; +} + +static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nlattr * uninitialized_var(name); + const struct nft_af_info *afi; + struct nft_table *table; + struct nft_chain *chain; + struct nft_base_chain *basechain = NULL; + struct nlattr *ha[NFTA_HOOK_MAX + 1]; + struct net *net = sock_net(skb->sk); + int family = nfmsg->nfgen_family; + u64 handle = 0; + int err; + bool create; + + create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; + + afi = nf_tables_afinfo_lookup(net, family, true); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]); + if (IS_ERR(table)) + return PTR_ERR(table); + + if (table->use == UINT_MAX) + return -EOVERFLOW; + + chain = NULL; + name = nla[NFTA_CHAIN_NAME]; + + if (nla[NFTA_CHAIN_HANDLE]) { + handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE])); + chain = nf_tables_chain_lookup_byhandle(table, handle); + if (IS_ERR(chain)) + return PTR_ERR(chain); + } else { + chain = nf_tables_chain_lookup(table, name); + if (IS_ERR(chain)) { + if (PTR_ERR(chain) != -ENOENT) + return PTR_ERR(chain); + chain = NULL; + } + } + + if (chain != NULL) { + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; + if (nlh->nlmsg_flags & NLM_F_REPLACE) + return -EOPNOTSUPP; + + if (nla[NFTA_CHAIN_HANDLE] && name && + !IS_ERR(nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]))) + return -EEXIST; + + if (nla[NFTA_CHAIN_POLICY]) { + if (!(chain->flags & NFT_BASE_CHAIN)) + return -EOPNOTSUPP; + + err = nf_tables_chain_policy(nft_base_chain(chain), + nla[NFTA_CHAIN_POLICY]); + if (err < 0) + return err; + } + + if (nla[NFTA_CHAIN_COUNTERS]) { + if (!(chain->flags & NFT_BASE_CHAIN)) + return -EOPNOTSUPP; + + err = nf_tables_counters(nft_base_chain(chain), + nla[NFTA_CHAIN_COUNTERS]); + if (err < 0) + return err; + } + + if (nla[NFTA_CHAIN_HANDLE] && name) + nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); + + goto notify; + } + + if (nla[NFTA_CHAIN_HOOK]) { + struct nf_hook_ops *ops; + nf_hookfn *hookfn; + u32 hooknum; + int type = NFT_CHAIN_T_DEFAULT; + + if (nla[NFTA_CHAIN_TYPE]) { + type = nf_tables_chain_type_lookup(afi, + nla[NFTA_CHAIN_TYPE], + create); + if (type < 0) + return -ENOENT; + } + + err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK], + nft_hook_policy); + if (err < 0) + return err; + if (ha[NFTA_HOOK_HOOKNUM] == NULL || + ha[NFTA_HOOK_PRIORITY] == NULL) + return -EINVAL; + + hooknum = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])); + if (hooknum >= afi->nhooks) + return -EINVAL; + + hookfn = chain_type[family][type]->fn[hooknum]; + if (hookfn == NULL) + return -EOPNOTSUPP; + + basechain = kzalloc(sizeof(*basechain), GFP_KERNEL); + if (basechain == NULL) + return -ENOMEM; + + basechain->type = type; + chain = &basechain->chain; + + ops = &basechain->ops; + ops->pf = family; + ops->owner = afi->owner; + ops->hooknum = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])); + ops->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY])); + ops->priv = chain; + ops->hook = hookfn; + if (afi->hooks[ops->hooknum]) + ops->hook = afi->hooks[ops->hooknum]; + + chain->flags |= NFT_BASE_CHAIN; + + if (nla[NFTA_CHAIN_POLICY]) { + err = nf_tables_chain_policy(basechain, + nla[NFTA_CHAIN_POLICY]); + if (err < 0) { + free_percpu(basechain->stats); + kfree(basechain); + return err; + } + } else + basechain->policy = NF_ACCEPT; + + if (nla[NFTA_CHAIN_COUNTERS]) { + err = nf_tables_counters(basechain, + nla[NFTA_CHAIN_COUNTERS]); + if (err < 0) { + free_percpu(basechain->stats); + kfree(basechain); + return err; + } + } else { + struct nft_stats __percpu *newstats; + + newstats = alloc_percpu(struct nft_stats); + if (newstats == NULL) + return -ENOMEM; + + rcu_assign_pointer(nft_base_chain(chain)->stats, + newstats); + } + } else { + chain = kzalloc(sizeof(*chain), GFP_KERNEL); + if (chain == NULL) + return -ENOMEM; + } + + INIT_LIST_HEAD(&chain->rules); + chain->handle = nf_tables_alloc_handle(table); + chain->net = net; + chain->table = table; + nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); + + if (!(table->flags & NFT_TABLE_F_DORMANT) && + chain->flags & NFT_BASE_CHAIN) { + err = nf_register_hook(&nft_base_chain(chain)->ops); + if (err < 0) { + free_percpu(basechain->stats); + kfree(basechain); + return err; + } + } + list_add_tail(&chain->list, &table->chains); + table->use++; +notify: + nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_NEWCHAIN, + family); + return 0; +} + +static void nf_tables_rcu_chain_destroy(struct rcu_head *head) +{ + struct nft_chain *chain = container_of(head, struct nft_chain, rcu_head); + + BUG_ON(chain->use > 0); + + if (chain->flags & NFT_BASE_CHAIN) { + free_percpu(nft_base_chain(chain)->stats); + kfree(nft_base_chain(chain)); + } else + kfree(chain); +} + +static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_af_info *afi; + struct nft_table *table; + struct nft_chain *chain; + struct net *net = sock_net(skb->sk); + int family = nfmsg->nfgen_family; + + afi = nf_tables_afinfo_lookup(net, family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]); + if (IS_ERR(table)) + return PTR_ERR(table); + + chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]); + if (IS_ERR(chain)) + return PTR_ERR(chain); + + if (!list_empty(&chain->rules)) + return -EBUSY; + + list_del(&chain->list); + table->use--; + + if (!(table->flags & NFT_TABLE_F_DORMANT) && + chain->flags & NFT_BASE_CHAIN) + nf_unregister_hook(&nft_base_chain(chain)->ops); + + nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_DELCHAIN, + family); + + /* Make sure all rule references are gone before this is released */ + call_rcu(&chain->rcu_head, nf_tables_rcu_chain_destroy); + return 0; +} + +static void nft_ctx_init(struct nft_ctx *ctx, + const struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nft_af_info *afi, + const struct nft_table *table, + const struct nft_chain *chain, + const struct nlattr * const *nla) +{ + ctx->net = sock_net(skb->sk); + ctx->skb = skb; + ctx->nlh = nlh; + ctx->afi = afi; + ctx->table = table; + ctx->chain = chain; + ctx->nla = nla; +} + +/* + * Expressions + */ + +/** + * nft_register_expr - register nf_tables expr type + * @ops: expr type + * + * Registers the expr type for use with nf_tables. Returns zero on + * success or a negative errno code otherwise. + */ +int nft_register_expr(struct nft_expr_type *type) +{ + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_add_tail(&type->list, &nf_tables_expressions); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + return 0; +} +EXPORT_SYMBOL_GPL(nft_register_expr); + +/** + * nft_unregister_expr - unregister nf_tables expr type + * @ops: expr type + * + * Unregisters the expr typefor use with nf_tables. + */ +void nft_unregister_expr(struct nft_expr_type *type) +{ + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_del(&type->list); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); +} +EXPORT_SYMBOL_GPL(nft_unregister_expr); + +static const struct nft_expr_type *__nft_expr_type_get(struct nlattr *nla) +{ + const struct nft_expr_type *type; + + list_for_each_entry(type, &nf_tables_expressions, list) { + if (!nla_strcmp(nla, type->name)) + return type; + } + return NULL; +} + +static const struct nft_expr_type *nft_expr_type_get(struct nlattr *nla) +{ + const struct nft_expr_type *type; + + if (nla == NULL) + return ERR_PTR(-EINVAL); + + type = __nft_expr_type_get(nla); + if (type != NULL && try_module_get(type->owner)) + return type; + +#ifdef CONFIG_MODULES + if (type == NULL) { + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + request_module("nft-expr-%.*s", + nla_len(nla), (char *)nla_data(nla)); + nfnl_lock(NFNL_SUBSYS_NFTABLES); + if (__nft_expr_type_get(nla)) + return ERR_PTR(-EAGAIN); + } +#endif + return ERR_PTR(-ENOENT); +} + +static const struct nla_policy nft_expr_policy[NFTA_EXPR_MAX + 1] = { + [NFTA_EXPR_NAME] = { .type = NLA_STRING }, + [NFTA_EXPR_DATA] = { .type = NLA_NESTED }, +}; + +static int nf_tables_fill_expr_info(struct sk_buff *skb, + const struct nft_expr *expr) +{ + if (nla_put_string(skb, NFTA_EXPR_NAME, expr->ops->type->name)) + goto nla_put_failure; + + if (expr->ops->dump) { + struct nlattr *data = nla_nest_start(skb, NFTA_EXPR_DATA); + if (data == NULL) + goto nla_put_failure; + if (expr->ops->dump(skb, expr) < 0) + goto nla_put_failure; + nla_nest_end(skb, data); + } + + return skb->len; + +nla_put_failure: + return -1; +}; + +struct nft_expr_info { + const struct nft_expr_ops *ops; + struct nlattr *tb[NFT_EXPR_MAXATTR + 1]; +}; + +static int nf_tables_expr_parse(const struct nft_ctx *ctx, + const struct nlattr *nla, + struct nft_expr_info *info) +{ + const struct nft_expr_type *type; + const struct nft_expr_ops *ops; + struct nlattr *tb[NFTA_EXPR_MAX + 1]; + int err; + + err = nla_parse_nested(tb, NFTA_EXPR_MAX, nla, nft_expr_policy); + if (err < 0) + return err; + + type = nft_expr_type_get(tb[NFTA_EXPR_NAME]); + if (IS_ERR(type)) + return PTR_ERR(type); + + if (tb[NFTA_EXPR_DATA]) { + err = nla_parse_nested(info->tb, type->maxattr, + tb[NFTA_EXPR_DATA], type->policy); + if (err < 0) + goto err1; + } else + memset(info->tb, 0, sizeof(info->tb[0]) * (type->maxattr + 1)); + + if (type->select_ops != NULL) { + ops = type->select_ops(ctx, + (const struct nlattr * const *)info->tb); + if (IS_ERR(ops)) { + err = PTR_ERR(ops); + goto err1; + } + } else + ops = type->ops; + + info->ops = ops; + return 0; + +err1: + module_put(type->owner); + return err; +} + +static int nf_tables_newexpr(const struct nft_ctx *ctx, + const struct nft_expr_info *info, + struct nft_expr *expr) +{ + const struct nft_expr_ops *ops = info->ops; + int err; + + expr->ops = ops; + if (ops->init) { + err = ops->init(ctx, expr, (const struct nlattr **)info->tb); + if (err < 0) + goto err1; + } + + return 0; + +err1: + expr->ops = NULL; + return err; +} + +static void nf_tables_expr_destroy(struct nft_expr *expr) +{ + if (expr->ops->destroy) + expr->ops->destroy(expr); + module_put(expr->ops->type->owner); +} + +/* + * Rules + */ + +static struct nft_rule *__nf_tables_rule_lookup(const struct nft_chain *chain, + u64 handle) +{ + struct nft_rule *rule; + + // FIXME: this sucks + list_for_each_entry(rule, &chain->rules, list) { + if (handle == rule->handle) + return rule; + } + + return ERR_PTR(-ENOENT); +} + +static struct nft_rule *nf_tables_rule_lookup(const struct nft_chain *chain, + const struct nlattr *nla) +{ + if (nla == NULL) + return ERR_PTR(-EINVAL); + + return __nf_tables_rule_lookup(chain, be64_to_cpu(nla_get_be64(nla))); +} + +static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = { + [NFTA_RULE_TABLE] = { .type = NLA_STRING }, + [NFTA_RULE_CHAIN] = { .type = NLA_STRING, + .len = NFT_CHAIN_MAXNAMELEN - 1 }, + [NFTA_RULE_HANDLE] = { .type = NLA_U64 }, + [NFTA_RULE_EXPRESSIONS] = { .type = NLA_NESTED }, + [NFTA_RULE_COMPAT] = { .type = NLA_NESTED }, + [NFTA_RULE_POSITION] = { .type = NLA_U64 }, +}; + +static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq, + int event, u32 flags, int family, + const struct nft_table *table, + const struct nft_chain *chain, + const struct nft_rule *rule) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + const struct nft_expr *expr, *next; + struct nlattr *list; + const struct nft_rule *prule; + int type = event | NFNL_SUBSYS_NFTABLES << 8; + + nlh = nlmsg_put(skb, portid, seq, type, sizeof(struct nfgenmsg), + flags); + if (nlh == NULL) + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_string(skb, NFTA_RULE_TABLE, table->name)) + goto nla_put_failure; + if (nla_put_string(skb, NFTA_RULE_CHAIN, chain->name)) + goto nla_put_failure; + if (nla_put_be64(skb, NFTA_RULE_HANDLE, cpu_to_be64(rule->handle))) + goto nla_put_failure; + + if ((event != NFT_MSG_DELRULE) && (rule->list.prev != &chain->rules)) { + prule = list_entry(rule->list.prev, struct nft_rule, list); + if (nla_put_be64(skb, NFTA_RULE_POSITION, + cpu_to_be64(prule->handle))) + goto nla_put_failure; + } + + list = nla_nest_start(skb, NFTA_RULE_EXPRESSIONS); + if (list == NULL) + goto nla_put_failure; + nft_rule_for_each_expr(expr, next, rule) { + struct nlattr *elem = nla_nest_start(skb, NFTA_LIST_ELEM); + if (elem == NULL) + goto nla_put_failure; + if (nf_tables_fill_expr_info(skb, expr) < 0) + goto nla_put_failure; + nla_nest_end(skb, elem); + } + nla_nest_end(skb, list); + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_trim(skb, nlh); + return -1; +} + +static int nf_tables_rule_notify(const struct sk_buff *oskb, + const struct nlmsghdr *nlh, + const struct nft_table *table, + const struct nft_chain *chain, + const struct nft_rule *rule, + int event, u32 flags, int family) +{ + struct sk_buff *skb; + u32 portid = NETLINK_CB(oskb).portid; + struct net *net = oskb ? sock_net(oskb->sk) : &init_net; + u32 seq = nlh->nlmsg_seq; + bool report; + int err; + + report = nlmsg_report(nlh); + if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) + return 0; + + err = -ENOBUFS; + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + goto err; + + err = nf_tables_fill_rule_info(skb, portid, seq, event, flags, + family, table, chain, rule); + if (err < 0) { + kfree_skb(skb); + goto err; + } + + err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, + GFP_KERNEL); +err: + if (err < 0) + nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); + return err; +} + +static inline bool +nft_rule_is_active(struct net *net, const struct nft_rule *rule) +{ + return (rule->genmask & (1 << net->nft.gencursor)) == 0; +} + +static inline int gencursor_next(struct net *net) +{ + return net->nft.gencursor+1 == 1 ? 1 : 0; +} + +static inline int +nft_rule_is_active_next(struct net *net, const struct nft_rule *rule) +{ + return (rule->genmask & (1 << gencursor_next(net))) == 0; +} + +static inline void +nft_rule_activate_next(struct net *net, struct nft_rule *rule) +{ + /* Now inactive, will be active in the future */ + rule->genmask = (1 << net->nft.gencursor); +} + +static inline void +nft_rule_disactivate_next(struct net *net, struct nft_rule *rule) +{ + rule->genmask = (1 << gencursor_next(net)); +} + +static inline void nft_rule_clear(struct net *net, struct nft_rule *rule) +{ + rule->genmask = 0; +} + +static int nf_tables_dump_rules(struct sk_buff *skb, + struct netlink_callback *cb) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + const struct nft_chain *chain; + const struct nft_rule *rule; + unsigned int idx = 0, s_idx = cb->args[0]; + struct net *net = sock_net(skb->sk); + int family = nfmsg->nfgen_family; + u8 genctr = ACCESS_ONCE(net->nft.genctr); + u8 gencursor = ACCESS_ONCE(net->nft.gencursor); + + list_for_each_entry(afi, &net->nft.af_info, list) { + if (family != NFPROTO_UNSPEC && family != afi->family) + continue; + + list_for_each_entry(table, &afi->tables, list) { + list_for_each_entry(chain, &table->chains, list) { + list_for_each_entry(rule, &chain->rules, list) { + if (!nft_rule_is_active(net, rule)) + goto cont; + if (idx < s_idx) + goto cont; + if (idx > s_idx) + memset(&cb->args[1], 0, + sizeof(cb->args) - sizeof(cb->args[0])); + if (nf_tables_fill_rule_info(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFT_MSG_NEWRULE, + NLM_F_MULTI | NLM_F_APPEND, + afi->family, table, chain, rule) < 0) + goto done; +cont: + idx++; + } + } + } + } +done: + /* Invalidate this dump, a transition to the new generation happened */ + if (gencursor != net->nft.gencursor || genctr != net->nft.genctr) + return -EBUSY; + + cb->args[0] = idx; + return skb->len; +} + +static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + const struct nft_chain *chain; + const struct nft_rule *rule; + struct sk_buff *skb2; + struct net *net = sock_net(skb->sk); + int family = nfmsg->nfgen_family; + int err; + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = nf_tables_dump_rules, + }; + return netlink_dump_start(nlsk, skb, nlh, &c); + } + + afi = nf_tables_afinfo_lookup(net, family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]); + if (IS_ERR(table)) + return PTR_ERR(table); + + chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); + if (IS_ERR(chain)) + return PTR_ERR(chain); + + rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); + if (IS_ERR(rule)) + return PTR_ERR(rule); + + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb2) + return -ENOMEM; + + err = nf_tables_fill_rule_info(skb2, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0, + family, table, chain, rule); + if (err < 0) + goto err; + + return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); + +err: + kfree_skb(skb2); + return err; +} + +static void nf_tables_rcu_rule_destroy(struct rcu_head *head) +{ + struct nft_rule *rule = container_of(head, struct nft_rule, rcu_head); + struct nft_expr *expr; + + /* + * Careful: some expressions might not be initialized in case this + * is called on error from nf_tables_newrule(). + */ + expr = nft_expr_first(rule); + while (expr->ops && expr != nft_expr_last(rule)) { + nf_tables_expr_destroy(expr); + expr = nft_expr_next(expr); + } + kfree(rule); +} + +static void nf_tables_rule_destroy(struct nft_rule *rule) +{ + call_rcu(&rule->rcu_head, nf_tables_rcu_rule_destroy); +} + +#define NFT_RULE_MAXEXPRS 128 + +static struct nft_expr_info *info; + +static struct nft_rule_trans * +nf_tables_trans_add(struct nft_rule *rule, const struct nft_ctx *ctx) +{ + struct nft_rule_trans *rupd; + + rupd = kmalloc(sizeof(struct nft_rule_trans), GFP_KERNEL); + if (rupd == NULL) + return NULL; + + rupd->chain = ctx->chain; + rupd->table = ctx->table; + rupd->rule = rule; + rupd->family = ctx->afi->family; + rupd->nlh = ctx->nlh; + list_add_tail(&rupd->list, &ctx->net->nft.commit_list); + + return rupd; +} + +static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_af_info *afi; + struct net *net = sock_net(skb->sk); + struct nft_table *table; + struct nft_chain *chain; + struct nft_rule *rule, *old_rule = NULL; + struct nft_rule_trans *repl = NULL; + struct nft_expr *expr; + struct nft_ctx ctx; + struct nlattr *tmp; + unsigned int size, i, n; + int err, rem; + bool create; + u64 handle, pos_handle; + + create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; + + afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]); + if (IS_ERR(table)) + return PTR_ERR(table); + + chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); + if (IS_ERR(chain)) + return PTR_ERR(chain); + + if (nla[NFTA_RULE_HANDLE]) { + handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_HANDLE])); + rule = __nf_tables_rule_lookup(chain, handle); + if (IS_ERR(rule)) + return PTR_ERR(rule); + + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; + if (nlh->nlmsg_flags & NLM_F_REPLACE) + old_rule = rule; + else + return -EOPNOTSUPP; + } else { + if (!create || nlh->nlmsg_flags & NLM_F_REPLACE) + return -EINVAL; + handle = nf_tables_alloc_handle(table); + } + + if (nla[NFTA_RULE_POSITION]) { + if (!(nlh->nlmsg_flags & NLM_F_CREATE)) + return -EOPNOTSUPP; + + pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION])); + old_rule = __nf_tables_rule_lookup(chain, pos_handle); + if (IS_ERR(old_rule)) + return PTR_ERR(old_rule); + } + + nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); + + n = 0; + size = 0; + if (nla[NFTA_RULE_EXPRESSIONS]) { + nla_for_each_nested(tmp, nla[NFTA_RULE_EXPRESSIONS], rem) { + err = -EINVAL; + if (nla_type(tmp) != NFTA_LIST_ELEM) + goto err1; + if (n == NFT_RULE_MAXEXPRS) + goto err1; + err = nf_tables_expr_parse(&ctx, tmp, &info[n]); + if (err < 0) + goto err1; + size += info[n].ops->size; + n++; + } + } + + err = -ENOMEM; + rule = kzalloc(sizeof(*rule) + size, GFP_KERNEL); + if (rule == NULL) + goto err1; + + nft_rule_activate_next(net, rule); + + rule->handle = handle; + rule->dlen = size; + + expr = nft_expr_first(rule); + for (i = 0; i < n; i++) { + err = nf_tables_newexpr(&ctx, &info[i], expr); + if (err < 0) + goto err2; + info[i].ops = NULL; + expr = nft_expr_next(expr); + } + + if (nlh->nlmsg_flags & NLM_F_REPLACE) { + if (nft_rule_is_active_next(net, old_rule)) { + repl = nf_tables_trans_add(old_rule, &ctx); + if (repl == NULL) { + err = -ENOMEM; + goto err2; + } + nft_rule_disactivate_next(net, old_rule); + list_add_tail(&rule->list, &old_rule->list); + } else { + err = -ENOENT; + goto err2; + } + } else if (nlh->nlmsg_flags & NLM_F_APPEND) + if (old_rule) + list_add_rcu(&rule->list, &old_rule->list); + else + list_add_tail_rcu(&rule->list, &chain->rules); + else { + if (old_rule) + list_add_tail_rcu(&rule->list, &old_rule->list); + else + list_add_rcu(&rule->list, &chain->rules); + } + + if (nf_tables_trans_add(rule, &ctx) == NULL) { + err = -ENOMEM; + goto err3; + } + return 0; + +err3: + list_del_rcu(&rule->list); + if (repl) { + list_del_rcu(&repl->rule->list); + list_del(&repl->list); + nft_rule_clear(net, repl->rule); + kfree(repl); + } +err2: + nf_tables_rule_destroy(rule); +err1: + for (i = 0; i < n; i++) { + if (info[i].ops != NULL) + module_put(info[i].ops->type->owner); + } + return err; +} + +static int +nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule) +{ + /* You cannot delete the same rule twice */ + if (nft_rule_is_active_next(ctx->net, rule)) { + if (nf_tables_trans_add(rule, ctx) == NULL) + return -ENOMEM; + nft_rule_disactivate_next(ctx->net, rule); + return 0; + } + return -ENOENT; +} + +static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_af_info *afi; + struct net *net = sock_net(skb->sk); + const struct nft_table *table; + struct nft_chain *chain; + struct nft_rule *rule, *tmp; + int family = nfmsg->nfgen_family, err = 0; + struct nft_ctx ctx; + + afi = nf_tables_afinfo_lookup(net, family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]); + if (IS_ERR(table)) + return PTR_ERR(table); + + chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); + if (IS_ERR(chain)) + return PTR_ERR(chain); + + nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); + + if (nla[NFTA_RULE_HANDLE]) { + rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); + if (IS_ERR(rule)) + return PTR_ERR(rule); + + err = nf_tables_delrule_one(&ctx, rule); + } else { + /* Remove all rules in this chain */ + list_for_each_entry_safe(rule, tmp, &chain->rules, list) { + err = nf_tables_delrule_one(&ctx, rule); + if (err < 0) + break; + } + } + + return err; +} + +static int nf_tables_commit(struct sk_buff *skb) +{ + struct net *net = sock_net(skb->sk); + struct nft_rule_trans *rupd, *tmp; + + /* Bump generation counter, invalidate any dump in progress */ + net->nft.genctr++; + + /* A new generation has just started */ + net->nft.gencursor = gencursor_next(net); + + /* Make sure all packets have left the previous generation before + * purging old rules. + */ + synchronize_rcu(); + + list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) { + /* Delete this rule from the dirty list */ + list_del(&rupd->list); + + /* This rule was inactive in the past and just became active. + * Clear the next bit of the genmask since its meaning has + * changed, now it is the future. + */ + if (nft_rule_is_active(net, rupd->rule)) { + nft_rule_clear(net, rupd->rule); + nf_tables_rule_notify(skb, rupd->nlh, rupd->table, + rupd->chain, rupd->rule, + NFT_MSG_NEWRULE, 0, + rupd->family); + kfree(rupd); + continue; + } + + /* This rule is in the past, get rid of it */ + list_del_rcu(&rupd->rule->list); + nf_tables_rule_notify(skb, rupd->nlh, rupd->table, rupd->chain, + rupd->rule, NFT_MSG_DELRULE, 0, + rupd->family); + nf_tables_rule_destroy(rupd->rule); + kfree(rupd); + } + + return 0; +} + +static int nf_tables_abort(struct sk_buff *skb) +{ + struct net *net = sock_net(skb->sk); + struct nft_rule_trans *rupd, *tmp; + + list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) { + /* Delete all rules from the dirty list */ + list_del(&rupd->list); + + if (!nft_rule_is_active_next(net, rupd->rule)) { + nft_rule_clear(net, rupd->rule); + kfree(rupd); + continue; + } + + /* This rule is inactive, get rid of it */ + list_del_rcu(&rupd->rule->list); + nf_tables_rule_destroy(rupd->rule); + kfree(rupd); + } + return 0; +} + +/* + * Sets + */ + +static LIST_HEAD(nf_tables_set_ops); + +int nft_register_set(struct nft_set_ops *ops) +{ + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_add_tail(&ops->list, &nf_tables_set_ops); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + return 0; +} +EXPORT_SYMBOL_GPL(nft_register_set); + +void nft_unregister_set(struct nft_set_ops *ops) +{ + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_del(&ops->list); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); +} +EXPORT_SYMBOL_GPL(nft_unregister_set); + +static const struct nft_set_ops *nft_select_set_ops(const struct nlattr * const nla[]) +{ + const struct nft_set_ops *ops; + u32 features; + +#ifdef CONFIG_MODULES + if (list_empty(&nf_tables_set_ops)) { + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + request_module("nft-set"); + nfnl_lock(NFNL_SUBSYS_NFTABLES); + if (!list_empty(&nf_tables_set_ops)) + return ERR_PTR(-EAGAIN); + } +#endif + features = 0; + if (nla[NFTA_SET_FLAGS] != NULL) { + features = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS])); + features &= NFT_SET_INTERVAL | NFT_SET_MAP; + } + + // FIXME: implement selection properly + list_for_each_entry(ops, &nf_tables_set_ops, list) { + if ((ops->features & features) != features) + continue; + if (!try_module_get(ops->owner)) + continue; + return ops; + } + + return ERR_PTR(-EOPNOTSUPP); +} + +static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { + [NFTA_SET_TABLE] = { .type = NLA_STRING }, + [NFTA_SET_NAME] = { .type = NLA_STRING }, + [NFTA_SET_FLAGS] = { .type = NLA_U32 }, + [NFTA_SET_KEY_TYPE] = { .type = NLA_U32 }, + [NFTA_SET_KEY_LEN] = { .type = NLA_U32 }, + [NFTA_SET_DATA_TYPE] = { .type = NLA_U32 }, + [NFTA_SET_DATA_LEN] = { .type = NLA_U32 }, +}; + +static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, + const struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + struct net *net = sock_net(skb->sk); + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_af_info *afi; + const struct nft_table *table = NULL; + + afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + if (nla[NFTA_SET_TABLE] != NULL) { + table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]); + if (IS_ERR(table)) + return PTR_ERR(table); + } + + nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla); + return 0; +} + +struct nft_set *nf_tables_set_lookup(const struct nft_table *table, + const struct nlattr *nla) +{ + struct nft_set *set; + + if (nla == NULL) + return ERR_PTR(-EINVAL); + + list_for_each_entry(set, &table->sets, list) { + if (!nla_strcmp(nla, set->name)) + return set; + } + return ERR_PTR(-ENOENT); +} + +static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set, + const char *name) +{ + const struct nft_set *i; + const char *p; + unsigned long *inuse; + unsigned int n = 0; + + p = strnchr(name, IFNAMSIZ, '%'); + if (p != NULL) { + if (p[1] != 'd' || strchr(p + 2, '%')) + return -EINVAL; + + inuse = (unsigned long *)get_zeroed_page(GFP_KERNEL); + if (inuse == NULL) + return -ENOMEM; + + list_for_each_entry(i, &ctx->table->sets, list) { + if (!sscanf(i->name, name, &n)) + continue; + if (n < 0 || n > BITS_PER_LONG * PAGE_SIZE) + continue; + set_bit(n, inuse); + } + + n = find_first_zero_bit(inuse, BITS_PER_LONG * PAGE_SIZE); + free_page((unsigned long)inuse); + } + + snprintf(set->name, sizeof(set->name), name, n); + list_for_each_entry(i, &ctx->table->sets, list) { + if (!strcmp(set->name, i->name)) + return -ENFILE; + } + return 0; +} + +static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, + const struct nft_set *set, u16 event, u16 flags) +{ + struct nfgenmsg *nfmsg; + struct nlmsghdr *nlh; + u32 portid = NETLINK_CB(ctx->skb).portid; + u32 seq = ctx->nlh->nlmsg_seq; + + event |= NFNL_SUBSYS_NFTABLES << 8; + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), + flags); + if (nlh == NULL) + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = ctx->afi->family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name)) + goto nla_put_failure; + if (nla_put_string(skb, NFTA_SET_NAME, set->name)) + goto nla_put_failure; + if (set->flags != 0) + if (nla_put_be32(skb, NFTA_SET_FLAGS, htonl(set->flags))) + goto nla_put_failure; + + if (nla_put_be32(skb, NFTA_SET_KEY_TYPE, htonl(set->ktype))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_SET_KEY_LEN, htonl(set->klen))) + goto nla_put_failure; + if (set->flags & NFT_SET_MAP) { + if (nla_put_be32(skb, NFTA_SET_DATA_TYPE, htonl(set->dtype))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_SET_DATA_LEN, htonl(set->dlen))) + goto nla_put_failure; + } + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_trim(skb, nlh); + return -1; +} + +static int nf_tables_set_notify(const struct nft_ctx *ctx, + const struct nft_set *set, + int event) +{ + struct sk_buff *skb; + u32 portid = NETLINK_CB(ctx->skb).portid; + bool report; + int err; + + report = nlmsg_report(ctx->nlh); + if (!report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) + return 0; + + err = -ENOBUFS; + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + goto err; + + err = nf_tables_fill_set(skb, ctx, set, event, 0); + if (err < 0) { + kfree_skb(skb); + goto err; + } + + err = nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES, report, + GFP_KERNEL); +err: + if (err < 0) + nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, err); + return err; +} + +static int nf_tables_dump_sets_table(struct nft_ctx *ctx, struct sk_buff *skb, + struct netlink_callback *cb) +{ + const struct nft_set *set; + unsigned int idx = 0, s_idx = cb->args[0]; + + if (cb->args[1]) + return skb->len; + + list_for_each_entry(set, &ctx->table->sets, list) { + if (idx < s_idx) + goto cont; + if (nf_tables_fill_set(skb, ctx, set, NFT_MSG_NEWSET, + NLM_F_MULTI) < 0) { + cb->args[0] = idx; + goto done; + } +cont: + idx++; + } + cb->args[1] = 1; +done: + return skb->len; +} + +static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb, + struct netlink_callback *cb) +{ + const struct nft_set *set; + unsigned int idx = 0, s_idx = cb->args[0]; + struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2]; + + if (cb->args[1]) + return skb->len; + + list_for_each_entry(table, &ctx->afi->tables, list) { + if (cur_table && cur_table != table) + continue; + + ctx->table = table; + list_for_each_entry(set, &ctx->table->sets, list) { + if (idx < s_idx) + goto cont; + if (nf_tables_fill_set(skb, ctx, set, NFT_MSG_NEWSET, + NLM_F_MULTI) < 0) { + cb->args[0] = idx; + cb->args[2] = (unsigned long) table; + goto done; + } +cont: + idx++; + } + } + cb->args[1] = 1; +done: + return skb->len; +} + +static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + struct nlattr *nla[NFTA_SET_MAX + 1]; + struct nft_ctx ctx; + int err, ret; + + err = nlmsg_parse(cb->nlh, sizeof(*nfmsg), nla, NFTA_SET_MAX, + nft_set_policy); + if (err < 0) + return err; + + err = nft_ctx_init_from_setattr(&ctx, cb->skb, cb->nlh, (void *)nla); + if (err < 0) + return err; + + if (ctx.table == NULL) + ret = nf_tables_dump_sets_all(&ctx, skb, cb); + else + ret = nf_tables_dump_sets_table(&ctx, skb, cb); + + return ret; +} + +static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nft_set *set; + struct nft_ctx ctx; + struct sk_buff *skb2; + int err; + + /* Verify existance before starting dump */ + err = nft_ctx_init_from_setattr(&ctx, skb, nlh, nla); + if (err < 0) + return err; + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = nf_tables_dump_sets, + }; + return netlink_dump_start(nlsk, skb, nlh, &c); + } + + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]); + if (IS_ERR(set)) + return PTR_ERR(set); + + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb2 == NULL) + return -ENOMEM; + + err = nf_tables_fill_set(skb2, &ctx, set, NFT_MSG_NEWSET, 0); + if (err < 0) + goto err; + + return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); + +err: + kfree_skb(skb2); + return err; +} + +static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_set_ops *ops; + const struct nft_af_info *afi; + struct net *net = sock_net(skb->sk); + struct nft_table *table; + struct nft_set *set; + struct nft_ctx ctx; + char name[IFNAMSIZ]; + unsigned int size; + bool create; + u32 ktype, klen, dlen, dtype, flags; + int err; + + if (nla[NFTA_SET_TABLE] == NULL || + nla[NFTA_SET_NAME] == NULL || + nla[NFTA_SET_KEY_LEN] == NULL) + return -EINVAL; + + ktype = NFT_DATA_VALUE; + if (nla[NFTA_SET_KEY_TYPE] != NULL) { + ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE])); + if ((ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK) + return -EINVAL; + } + + klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN])); + if (klen == 0 || klen > FIELD_SIZEOF(struct nft_data, data)) + return -EINVAL; + + flags = 0; + if (nla[NFTA_SET_FLAGS] != NULL) { + flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS])); + if (flags & ~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT | + NFT_SET_INTERVAL | NFT_SET_MAP)) + return -EINVAL; + } + + dtype = 0; + dlen = 0; + if (nla[NFTA_SET_DATA_TYPE] != NULL) { + if (!(flags & NFT_SET_MAP)) + return -EINVAL; + + dtype = ntohl(nla_get_be32(nla[NFTA_SET_DATA_TYPE])); + if ((dtype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK && + dtype != NFT_DATA_VERDICT) + return -EINVAL; + + if (dtype != NFT_DATA_VERDICT) { + if (nla[NFTA_SET_DATA_LEN] == NULL) + return -EINVAL; + dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN])); + if (dlen == 0 || + dlen > FIELD_SIZEOF(struct nft_data, data)) + return -EINVAL; + } else + dlen = sizeof(struct nft_data); + } else if (flags & NFT_SET_MAP) + return -EINVAL; + + create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; + + afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]); + if (IS_ERR(table)) + return PTR_ERR(table); + + nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla); + + set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME]); + if (IS_ERR(set)) { + if (PTR_ERR(set) != -ENOENT) + return PTR_ERR(set); + set = NULL; + } + + if (set != NULL) { + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; + if (nlh->nlmsg_flags & NLM_F_REPLACE) + return -EOPNOTSUPP; + return 0; + } + + if (!(nlh->nlmsg_flags & NLM_F_CREATE)) + return -ENOENT; + + ops = nft_select_set_ops(nla); + if (IS_ERR(ops)) + return PTR_ERR(ops); + + size = 0; + if (ops->privsize != NULL) + size = ops->privsize(nla); + + err = -ENOMEM; + set = kzalloc(sizeof(*set) + size, GFP_KERNEL); + if (set == NULL) + goto err1; + + nla_strlcpy(name, nla[NFTA_SET_NAME], sizeof(set->name)); + err = nf_tables_set_alloc_name(&ctx, set, name); + if (err < 0) + goto err2; + + INIT_LIST_HEAD(&set->bindings); + set->ops = ops; + set->ktype = ktype; + set->klen = klen; + set->dtype = dtype; + set->dlen = dlen; + set->flags = flags; + + err = ops->init(set, nla); + if (err < 0) + goto err2; + + list_add_tail(&set->list, &table->sets); + nf_tables_set_notify(&ctx, set, NFT_MSG_NEWSET); + return 0; + +err2: + kfree(set); +err1: + module_put(ops->owner); + return err; +} + +static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set) +{ + list_del(&set->list); + if (!(set->flags & NFT_SET_ANONYMOUS)) + nf_tables_set_notify(ctx, set, NFT_MSG_DELSET); + + set->ops->destroy(set); + module_put(set->ops->owner); + kfree(set); +} + +static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + struct nft_set *set; + struct nft_ctx ctx; + int err; + + if (nla[NFTA_SET_TABLE] == NULL) + return -EINVAL; + + err = nft_ctx_init_from_setattr(&ctx, skb, nlh, nla); + if (err < 0) + return err; + + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]); + if (IS_ERR(set)) + return PTR_ERR(set); + if (!list_empty(&set->bindings)) + return -EBUSY; + + nf_tables_set_destroy(&ctx, set); + return 0; +} + +static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx, + const struct nft_set *set, + const struct nft_set_iter *iter, + const struct nft_set_elem *elem) +{ + enum nft_registers dreg; + + dreg = nft_type_to_reg(set->dtype); + return nft_validate_data_load(ctx, dreg, &elem->data, set->dtype); +} + +int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_binding *binding) +{ + struct nft_set_binding *i; + struct nft_set_iter iter; + + if (!list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS) + return -EBUSY; + + if (set->flags & NFT_SET_MAP) { + /* If the set is already bound to the same chain all + * jumps are already validated for that chain. + */ + list_for_each_entry(i, &set->bindings, list) { + if (i->chain == binding->chain) + goto bind; + } + + iter.skip = 0; + iter.count = 0; + iter.err = 0; + iter.fn = nf_tables_bind_check_setelem; + + set->ops->walk(ctx, set, &iter); + if (iter.err < 0) { + /* Destroy anonymous sets if binding fails */ + if (set->flags & NFT_SET_ANONYMOUS) + nf_tables_set_destroy(ctx, set); + + return iter.err; + } + } +bind: + binding->chain = ctx->chain; + list_add_tail(&binding->list, &set->bindings); + return 0; +} + +void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_binding *binding) +{ + list_del(&binding->list); + + if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS) + nf_tables_set_destroy(ctx, set); +} + +/* + * Set elements + */ + +static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = { + [NFTA_SET_ELEM_KEY] = { .type = NLA_NESTED }, + [NFTA_SET_ELEM_DATA] = { .type = NLA_NESTED }, + [NFTA_SET_ELEM_FLAGS] = { .type = NLA_U32 }, +}; + +static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = { + [NFTA_SET_ELEM_LIST_TABLE] = { .type = NLA_STRING }, + [NFTA_SET_ELEM_LIST_SET] = { .type = NLA_STRING }, + [NFTA_SET_ELEM_LIST_ELEMENTS] = { .type = NLA_NESTED }, +}; + +static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, + const struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + struct net *net = sock_net(skb->sk); + + afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE]); + if (IS_ERR(table)) + return PTR_ERR(table); + + nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla); + return 0; +} + +static int nf_tables_fill_setelem(struct sk_buff *skb, + const struct nft_set *set, + const struct nft_set_elem *elem) +{ + unsigned char *b = skb_tail_pointer(skb); + struct nlattr *nest; + + nest = nla_nest_start(skb, NFTA_LIST_ELEM); + if (nest == NULL) + goto nla_put_failure; + + if (nft_data_dump(skb, NFTA_SET_ELEM_KEY, &elem->key, NFT_DATA_VALUE, + set->klen) < 0) + goto nla_put_failure; + + if (set->flags & NFT_SET_MAP && + !(elem->flags & NFT_SET_ELEM_INTERVAL_END) && + nft_data_dump(skb, NFTA_SET_ELEM_DATA, &elem->data, + set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE, + set->dlen) < 0) + goto nla_put_failure; + + if (elem->flags != 0) + if (nla_put_be32(skb, NFTA_SET_ELEM_FLAGS, htonl(elem->flags))) + goto nla_put_failure; + + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + nlmsg_trim(skb, b); + return -EMSGSIZE; +} + +struct nft_set_dump_args { + const struct netlink_callback *cb; + struct nft_set_iter iter; + struct sk_buff *skb; +}; + +static int nf_tables_dump_setelem(const struct nft_ctx *ctx, + const struct nft_set *set, + const struct nft_set_iter *iter, + const struct nft_set_elem *elem) +{ + struct nft_set_dump_args *args; + + args = container_of(iter, struct nft_set_dump_args, iter); + return nf_tables_fill_setelem(args->skb, set, elem); +} + +static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) +{ + const struct nft_set *set; + struct nft_set_dump_args args; + struct nft_ctx ctx; + struct nlattr *nla[NFTA_SET_ELEM_LIST_MAX + 1]; + struct nfgenmsg *nfmsg; + struct nlmsghdr *nlh; + struct nlattr *nest; + u32 portid, seq; + int event, err; + + nfmsg = nlmsg_data(cb->nlh); + err = nlmsg_parse(cb->nlh, sizeof(*nfmsg), nla, NFTA_SET_ELEM_LIST_MAX, + nft_set_elem_list_policy); + if (err < 0) + return err; + + err = nft_ctx_init_from_elemattr(&ctx, cb->skb, cb->nlh, (void *)nla); + if (err < 0) + return err; + + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); + if (IS_ERR(set)) + return PTR_ERR(set); + + event = NFT_MSG_NEWSETELEM; + event |= NFNL_SUBSYS_NFTABLES << 8; + portid = NETLINK_CB(cb->skb).portid; + seq = cb->nlh->nlmsg_seq; + + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), + NLM_F_MULTI); + if (nlh == NULL) + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = NFPROTO_UNSPEC; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, ctx.table->name)) + goto nla_put_failure; + if (nla_put_string(skb, NFTA_SET_ELEM_LIST_SET, set->name)) + goto nla_put_failure; + + nest = nla_nest_start(skb, NFTA_SET_ELEM_LIST_ELEMENTS); + if (nest == NULL) + goto nla_put_failure; + + args.cb = cb; + args.skb = skb; + args.iter.skip = cb->args[0]; + args.iter.count = 0; + args.iter.err = 0; + args.iter.fn = nf_tables_dump_setelem; + set->ops->walk(&ctx, set, &args.iter); + + nla_nest_end(skb, nest); + nlmsg_end(skb, nlh); + + if (args.iter.err && args.iter.err != -EMSGSIZE) + return args.iter.err; + if (args.iter.count == cb->args[0]) + return 0; + + cb->args[0] = args.iter.count; + return skb->len; + +nla_put_failure: + return -ENOSPC; +} + +static int nf_tables_getsetelem(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nft_set *set; + struct nft_ctx ctx; + int err; + + err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla); + if (err < 0) + return err; + + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); + if (IS_ERR(set)) + return PTR_ERR(set); + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = nf_tables_dump_set, + }; + return netlink_dump_start(nlsk, skb, nlh, &c); + } + return -EOPNOTSUPP; +} + +static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set, + const struct nlattr *attr) +{ + struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; + struct nft_data_desc d1, d2; + struct nft_set_elem elem; + struct nft_set_binding *binding; + enum nft_registers dreg; + int err; + + err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr, + nft_set_elem_policy); + if (err < 0) + return err; + + if (nla[NFTA_SET_ELEM_KEY] == NULL) + return -EINVAL; + + elem.flags = 0; + if (nla[NFTA_SET_ELEM_FLAGS] != NULL) { + elem.flags = ntohl(nla_get_be32(nla[NFTA_SET_ELEM_FLAGS])); + if (elem.flags & ~NFT_SET_ELEM_INTERVAL_END) + return -EINVAL; + } + + if (set->flags & NFT_SET_MAP) { + if (nla[NFTA_SET_ELEM_DATA] == NULL && + !(elem.flags & NFT_SET_ELEM_INTERVAL_END)) + return -EINVAL; + } else { + if (nla[NFTA_SET_ELEM_DATA] != NULL) + return -EINVAL; + } + + err = nft_data_init(ctx, &elem.key, &d1, nla[NFTA_SET_ELEM_KEY]); + if (err < 0) + goto err1; + err = -EINVAL; + if (d1.type != NFT_DATA_VALUE || d1.len != set->klen) + goto err2; + + err = -EEXIST; + if (set->ops->get(set, &elem) == 0) + goto err2; + + if (nla[NFTA_SET_ELEM_DATA] != NULL) { + err = nft_data_init(ctx, &elem.data, &d2, nla[NFTA_SET_ELEM_DATA]); + if (err < 0) + goto err2; + + err = -EINVAL; + if (set->dtype != NFT_DATA_VERDICT && d2.len != set->dlen) + goto err3; + + dreg = nft_type_to_reg(set->dtype); + list_for_each_entry(binding, &set->bindings, list) { + struct nft_ctx bind_ctx = { + .afi = ctx->afi, + .table = ctx->table, + .chain = binding->chain, + }; + + err = nft_validate_data_load(&bind_ctx, dreg, + &elem.data, d2.type); + if (err < 0) + goto err3; + } + } + + err = set->ops->insert(set, &elem); + if (err < 0) + goto err3; + + return 0; + +err3: + if (nla[NFTA_SET_ELEM_DATA] != NULL) + nft_data_uninit(&elem.data, d2.type); +err2: + nft_data_uninit(&elem.key, d1.type); +err1: + return err; +} + +static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nlattr *attr; + struct nft_set *set; + struct nft_ctx ctx; + int rem, err; + + err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla); + if (err < 0) + return err; + + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); + if (IS_ERR(set)) + return PTR_ERR(set); + if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) + return -EBUSY; + + nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { + err = nft_add_set_elem(&ctx, set, attr); + if (err < 0) + return err; + } + return 0; +} + +static int nft_del_setelem(const struct nft_ctx *ctx, struct nft_set *set, + const struct nlattr *attr) +{ + struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; + struct nft_data_desc desc; + struct nft_set_elem elem; + int err; + + err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr, + nft_set_elem_policy); + if (err < 0) + goto err1; + + err = -EINVAL; + if (nla[NFTA_SET_ELEM_KEY] == NULL) + goto err1; + + err = nft_data_init(ctx, &elem.key, &desc, nla[NFTA_SET_ELEM_KEY]); + if (err < 0) + goto err1; + + err = -EINVAL; + if (desc.type != NFT_DATA_VALUE || desc.len != set->klen) + goto err2; + + err = set->ops->get(set, &elem); + if (err < 0) + goto err2; + + set->ops->remove(set, &elem); + + nft_data_uninit(&elem.key, NFT_DATA_VALUE); + if (set->flags & NFT_SET_MAP) + nft_data_uninit(&elem.data, set->dtype); + +err2: + nft_data_uninit(&elem.key, desc.type); +err1: + return err; +} + +static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nlattr *attr; + struct nft_set *set; + struct nft_ctx ctx; + int rem, err; + + err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla); + if (err < 0) + return err; + + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); + if (IS_ERR(set)) + return PTR_ERR(set); + if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) + return -EBUSY; + + nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { + err = nft_del_setelem(&ctx, set, attr); + if (err < 0) + return err; + } + return 0; +} + +static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { + [NFT_MSG_NEWTABLE] = { + .call = nf_tables_newtable, + .attr_count = NFTA_TABLE_MAX, + .policy = nft_table_policy, + }, + [NFT_MSG_GETTABLE] = { + .call = nf_tables_gettable, + .attr_count = NFTA_TABLE_MAX, + .policy = nft_table_policy, + }, + [NFT_MSG_DELTABLE] = { + .call = nf_tables_deltable, + .attr_count = NFTA_TABLE_MAX, + .policy = nft_table_policy, + }, + [NFT_MSG_NEWCHAIN] = { + .call = nf_tables_newchain, + .attr_count = NFTA_CHAIN_MAX, + .policy = nft_chain_policy, + }, + [NFT_MSG_GETCHAIN] = { + .call = nf_tables_getchain, + .attr_count = NFTA_CHAIN_MAX, + .policy = nft_chain_policy, + }, + [NFT_MSG_DELCHAIN] = { + .call = nf_tables_delchain, + .attr_count = NFTA_CHAIN_MAX, + .policy = nft_chain_policy, + }, + [NFT_MSG_NEWRULE] = { + .call_batch = nf_tables_newrule, + .attr_count = NFTA_RULE_MAX, + .policy = nft_rule_policy, + }, + [NFT_MSG_GETRULE] = { + .call = nf_tables_getrule, + .attr_count = NFTA_RULE_MAX, + .policy = nft_rule_policy, + }, + [NFT_MSG_DELRULE] = { + .call_batch = nf_tables_delrule, + .attr_count = NFTA_RULE_MAX, + .policy = nft_rule_policy, + }, + [NFT_MSG_NEWSET] = { + .call = nf_tables_newset, + .attr_count = NFTA_SET_MAX, + .policy = nft_set_policy, + }, + [NFT_MSG_GETSET] = { + .call = nf_tables_getset, + .attr_count = NFTA_SET_MAX, + .policy = nft_set_policy, + }, + [NFT_MSG_DELSET] = { + .call = nf_tables_delset, + .attr_count = NFTA_SET_MAX, + .policy = nft_set_policy, + }, + [NFT_MSG_NEWSETELEM] = { + .call = nf_tables_newsetelem, + .attr_count = NFTA_SET_ELEM_LIST_MAX, + .policy = nft_set_elem_list_policy, + }, + [NFT_MSG_GETSETELEM] = { + .call = nf_tables_getsetelem, + .attr_count = NFTA_SET_ELEM_LIST_MAX, + .policy = nft_set_elem_list_policy, + }, + [NFT_MSG_DELSETELEM] = { + .call = nf_tables_delsetelem, + .attr_count = NFTA_SET_ELEM_LIST_MAX, + .policy = nft_set_elem_list_policy, + }, +}; + +static const struct nfnetlink_subsystem nf_tables_subsys = { + .name = "nf_tables", + .subsys_id = NFNL_SUBSYS_NFTABLES, + .cb_count = NFT_MSG_MAX, + .cb = nf_tables_cb, + .commit = nf_tables_commit, + .abort = nf_tables_abort, +}; + +/* + * Loop detection - walk through the ruleset beginning at the destination chain + * of a new jump until either the source chain is reached (loop) or all + * reachable chains have been traversed. + * + * The loop check is performed whenever a new jump verdict is added to an + * expression or verdict map or a verdict map is bound to a new chain. + */ + +static int nf_tables_check_loops(const struct nft_ctx *ctx, + const struct nft_chain *chain); + +static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx, + const struct nft_set *set, + const struct nft_set_iter *iter, + const struct nft_set_elem *elem) +{ + switch (elem->data.verdict) { + case NFT_JUMP: + case NFT_GOTO: + return nf_tables_check_loops(ctx, elem->data.chain); + default: + return 0; + } +} + +static int nf_tables_check_loops(const struct nft_ctx *ctx, + const struct nft_chain *chain) +{ + const struct nft_rule *rule; + const struct nft_expr *expr, *last; + const struct nft_set *set; + struct nft_set_binding *binding; + struct nft_set_iter iter; + + if (ctx->chain == chain) + return -ELOOP; + + list_for_each_entry(rule, &chain->rules, list) { + nft_rule_for_each_expr(expr, last, rule) { + const struct nft_data *data = NULL; + int err; + + if (!expr->ops->validate) + continue; + + err = expr->ops->validate(ctx, expr, &data); + if (err < 0) + return err; + + if (data == NULL) + continue; + + switch (data->verdict) { + case NFT_JUMP: + case NFT_GOTO: + err = nf_tables_check_loops(ctx, data->chain); + if (err < 0) + return err; + default: + break; + } + } + } + + list_for_each_entry(set, &ctx->table->sets, list) { + if (!(set->flags & NFT_SET_MAP) || + set->dtype != NFT_DATA_VERDICT) + continue; + + list_for_each_entry(binding, &set->bindings, list) { + if (binding->chain != chain) + continue; + + iter.skip = 0; + iter.count = 0; + iter.err = 0; + iter.fn = nf_tables_loop_check_setelem; + + set->ops->walk(ctx, set, &iter); + if (iter.err < 0) + return iter.err; + } + } + + return 0; +} + +/** + * nft_validate_input_register - validate an expressions' input register + * + * @reg: the register number + * + * Validate that the input register is one of the general purpose + * registers. + */ +int nft_validate_input_register(enum nft_registers reg) +{ + if (reg <= NFT_REG_VERDICT) + return -EINVAL; + if (reg > NFT_REG_MAX) + return -ERANGE; + return 0; +} +EXPORT_SYMBOL_GPL(nft_validate_input_register); + +/** + * nft_validate_output_register - validate an expressions' output register + * + * @reg: the register number + * + * Validate that the output register is one of the general purpose + * registers or the verdict register. + */ +int nft_validate_output_register(enum nft_registers reg) +{ + if (reg < NFT_REG_VERDICT) + return -EINVAL; + if (reg > NFT_REG_MAX) + return -ERANGE; + return 0; +} +EXPORT_SYMBOL_GPL(nft_validate_output_register); + +/** + * nft_validate_data_load - validate an expressions' data load + * + * @ctx: context of the expression performing the load + * @reg: the destination register number + * @data: the data to load + * @type: the data type + * + * Validate that a data load uses the appropriate data type for + * the destination register. A value of NULL for the data means + * that its runtime gathered data, which is always of type + * NFT_DATA_VALUE. + */ +int nft_validate_data_load(const struct nft_ctx *ctx, enum nft_registers reg, + const struct nft_data *data, + enum nft_data_types type) +{ + int err; + + switch (reg) { + case NFT_REG_VERDICT: + if (data == NULL || type != NFT_DATA_VERDICT) + return -EINVAL; + + if (data->verdict == NFT_GOTO || data->verdict == NFT_JUMP) { + err = nf_tables_check_loops(ctx, data->chain); + if (err < 0) + return err; + + if (ctx->chain->level + 1 > data->chain->level) { + if (ctx->chain->level + 1 == NFT_JUMP_STACK_SIZE) + return -EMLINK; + data->chain->level = ctx->chain->level + 1; + } + } + + return 0; + default: + if (data != NULL && type != NFT_DATA_VALUE) + return -EINVAL; + return 0; + } +} +EXPORT_SYMBOL_GPL(nft_validate_data_load); + +static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = { + [NFTA_VERDICT_CODE] = { .type = NLA_U32 }, + [NFTA_VERDICT_CHAIN] = { .type = NLA_STRING, + .len = NFT_CHAIN_MAXNAMELEN - 1 }, +}; + +static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, + struct nft_data_desc *desc, const struct nlattr *nla) +{ + struct nlattr *tb[NFTA_VERDICT_MAX + 1]; + struct nft_chain *chain; + int err; + + err = nla_parse_nested(tb, NFTA_VERDICT_MAX, nla, nft_verdict_policy); + if (err < 0) + return err; + + if (!tb[NFTA_VERDICT_CODE]) + return -EINVAL; + data->verdict = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE])); + + switch (data->verdict) { + case NF_ACCEPT: + case NF_DROP: + case NF_QUEUE: + case NFT_CONTINUE: + case NFT_BREAK: + case NFT_RETURN: + desc->len = sizeof(data->verdict); + break; + case NFT_JUMP: + case NFT_GOTO: + if (!tb[NFTA_VERDICT_CHAIN]) + return -EINVAL; + chain = nf_tables_chain_lookup(ctx->table, + tb[NFTA_VERDICT_CHAIN]); + if (IS_ERR(chain)) + return PTR_ERR(chain); + if (chain->flags & NFT_BASE_CHAIN) + return -EOPNOTSUPP; + + chain->use++; + data->chain = chain; + desc->len = sizeof(data); + break; + default: + return -EINVAL; + } + + desc->type = NFT_DATA_VERDICT; + return 0; +} + +static void nft_verdict_uninit(const struct nft_data *data) +{ + switch (data->verdict) { + case NFT_JUMP: + case NFT_GOTO: + data->chain->use--; + break; + } +} + +static int nft_verdict_dump(struct sk_buff *skb, const struct nft_data *data) +{ + struct nlattr *nest; + + nest = nla_nest_start(skb, NFTA_DATA_VERDICT); + if (!nest) + goto nla_put_failure; + + if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(data->verdict))) + goto nla_put_failure; + + switch (data->verdict) { + case NFT_JUMP: + case NFT_GOTO: + if (nla_put_string(skb, NFTA_VERDICT_CHAIN, data->chain->name)) + goto nla_put_failure; + } + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + return -1; +} + +static int nft_value_init(const struct nft_ctx *ctx, struct nft_data *data, + struct nft_data_desc *desc, const struct nlattr *nla) +{ + unsigned int len; + + len = nla_len(nla); + if (len == 0) + return -EINVAL; + if (len > sizeof(data->data)) + return -EOVERFLOW; + + nla_memcpy(data->data, nla, sizeof(data->data)); + desc->type = NFT_DATA_VALUE; + desc->len = len; + return 0; +} + +static int nft_value_dump(struct sk_buff *skb, const struct nft_data *data, + unsigned int len) +{ + return nla_put(skb, NFTA_DATA_VALUE, len, data->data); +} + +static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = { + [NFTA_DATA_VALUE] = { .type = NLA_BINARY, + .len = FIELD_SIZEOF(struct nft_data, data) }, + [NFTA_DATA_VERDICT] = { .type = NLA_NESTED }, +}; + +/** + * nft_data_init - parse nf_tables data netlink attributes + * + * @ctx: context of the expression using the data + * @data: destination struct nft_data + * @desc: data description + * @nla: netlink attribute containing data + * + * Parse the netlink data attributes and initialize a struct nft_data. + * The type and length of data are returned in the data description. + * + * The caller can indicate that it only wants to accept data of type + * NFT_DATA_VALUE by passing NULL for the ctx argument. + */ +int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data, + struct nft_data_desc *desc, const struct nlattr *nla) +{ + struct nlattr *tb[NFTA_DATA_MAX + 1]; + int err; + + err = nla_parse_nested(tb, NFTA_DATA_MAX, nla, nft_data_policy); + if (err < 0) + return err; + + if (tb[NFTA_DATA_VALUE]) + return nft_value_init(ctx, data, desc, tb[NFTA_DATA_VALUE]); + if (tb[NFTA_DATA_VERDICT] && ctx != NULL) + return nft_verdict_init(ctx, data, desc, tb[NFTA_DATA_VERDICT]); + return -EINVAL; +} +EXPORT_SYMBOL_GPL(nft_data_init); + +/** + * nft_data_uninit - release a nft_data item + * + * @data: struct nft_data to release + * @type: type of data + * + * Release a nft_data item. NFT_DATA_VALUE types can be silently discarded, + * all others need to be released by calling this function. + */ +void nft_data_uninit(const struct nft_data *data, enum nft_data_types type) +{ + switch (type) { + case NFT_DATA_VALUE: + return; + case NFT_DATA_VERDICT: + return nft_verdict_uninit(data); + default: + WARN_ON(1); + } +} +EXPORT_SYMBOL_GPL(nft_data_uninit); + +int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, + enum nft_data_types type, unsigned int len) +{ + struct nlattr *nest; + int err; + + nest = nla_nest_start(skb, attr); + if (nest == NULL) + return -1; + + switch (type) { + case NFT_DATA_VALUE: + err = nft_value_dump(skb, data, len); + break; + case NFT_DATA_VERDICT: + err = nft_verdict_dump(skb, data); + break; + default: + err = -EINVAL; + WARN_ON(1); + } + + nla_nest_end(skb, nest); + return err; +} +EXPORT_SYMBOL_GPL(nft_data_dump); + +static int nf_tables_init_net(struct net *net) +{ + INIT_LIST_HEAD(&net->nft.af_info); + INIT_LIST_HEAD(&net->nft.commit_list); + return 0; +} + +static struct pernet_operations nf_tables_net_ops = { + .init = nf_tables_init_net, +}; + +static int __init nf_tables_module_init(void) +{ + int err; + + info = kmalloc(sizeof(struct nft_expr_info) * NFT_RULE_MAXEXPRS, + GFP_KERNEL); + if (info == NULL) { + err = -ENOMEM; + goto err1; + } + + err = nf_tables_core_module_init(); + if (err < 0) + goto err2; + + err = nfnetlink_subsys_register(&nf_tables_subsys); + if (err < 0) + goto err3; + + pr_info("nf_tables: (c) 2007-2009 Patrick McHardy <kaber@trash.net>\n"); + return register_pernet_subsys(&nf_tables_net_ops); +err3: + nf_tables_core_module_exit(); +err2: + kfree(info); +err1: + return err; +} + +static void __exit nf_tables_module_exit(void) +{ + unregister_pernet_subsys(&nf_tables_net_ops); + nfnetlink_subsys_unregister(&nf_tables_subsys); + nf_tables_core_module_exit(); + kfree(info); +} + +module_init(nf_tables_module_init); +module_exit(nf_tables_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFTABLES); diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c new file mode 100644 index 000000000000..cb9e685caae1 --- /dev/null +++ b/net/netfilter/nf_tables_core.c @@ -0,0 +1,270 @@ +/* + * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/rculist.h> +#include <linux/skbuff.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_log.h> + +static void nft_cmp_fast_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1]) +{ + const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr); + u32 mask; + + mask = ~0U >> (sizeof(priv->data) * BITS_PER_BYTE - priv->len); + if ((data[priv->sreg].data[0] & mask) == priv->data) + return; + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static bool nft_payload_fast_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_payload *priv = nft_expr_priv(expr); + const struct sk_buff *skb = pkt->skb; + struct nft_data *dest = &data[priv->dreg]; + unsigned char *ptr; + + if (priv->base == NFT_PAYLOAD_NETWORK_HEADER) + ptr = skb_network_header(skb); + else + ptr = skb_network_header(skb) + pkt->xt.thoff; + + ptr += priv->offset; + + if (unlikely(ptr + priv->len >= skb_tail_pointer(skb))) + return false; + + if (priv->len == 2) + *(u16 *)dest->data = *(u16 *)ptr; + else if (priv->len == 4) + *(u32 *)dest->data = *(u32 *)ptr; + else + *(u8 *)dest->data = *(u8 *)ptr; + return true; +} + +struct nft_jumpstack { + const struct nft_chain *chain; + const struct nft_rule *rule; + int rulenum; +}; + +static inline void +nft_chain_stats(const struct nft_chain *this, const struct nft_pktinfo *pkt, + struct nft_jumpstack *jumpstack, unsigned int stackptr) +{ + struct nft_stats __percpu *stats; + const struct nft_chain *chain = stackptr ? jumpstack[0].chain : this; + + rcu_read_lock_bh(); + stats = rcu_dereference(nft_base_chain(chain)->stats); + __this_cpu_inc(stats->pkts); + __this_cpu_add(stats->bytes, pkt->skb->len); + rcu_read_unlock_bh(); +} + +enum nft_trace { + NFT_TRACE_RULE, + NFT_TRACE_RETURN, + NFT_TRACE_POLICY, +}; + +static const char *const comments[] = { + [NFT_TRACE_RULE] = "rule", + [NFT_TRACE_RETURN] = "return", + [NFT_TRACE_POLICY] = "policy", +}; + +static struct nf_loginfo trace_loginfo = { + .type = NF_LOG_TYPE_LOG, + .u = { + .log = { + .level = 4, + .logflags = NF_LOG_MASK, + }, + }, +}; + +static inline void nft_trace_packet(const struct nft_pktinfo *pkt, + const struct nft_chain *chain, + int rulenum, enum nft_trace type) +{ + struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); + + nf_log_packet(net, pkt->xt.family, pkt->hooknum, pkt->skb, pkt->in, + pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", + chain->table->name, chain->name, comments[type], + rulenum); +} + +unsigned int +nft_do_chain_pktinfo(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) +{ + const struct nft_chain *chain = ops->priv; + const struct nft_rule *rule; + const struct nft_expr *expr, *last; + struct nft_data data[NFT_REG_MAX + 1]; + unsigned int stackptr = 0; + struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; + int rulenum = 0; + /* + * Cache cursor to avoid problems in case that the cursor is updated + * while traversing the ruleset. + */ + unsigned int gencursor = ACCESS_ONCE(chain->net->nft.gencursor); + +do_chain: + rule = list_entry(&chain->rules, struct nft_rule, list); +next_rule: + data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; + list_for_each_entry_continue_rcu(rule, &chain->rules, list) { + + /* This rule is not active, skip. */ + if (unlikely(rule->genmask & (1 << gencursor))) + continue; + + rulenum++; + + nft_rule_for_each_expr(expr, last, rule) { + if (expr->ops == &nft_cmp_fast_ops) + nft_cmp_fast_eval(expr, data); + else if (expr->ops != &nft_payload_fast_ops || + !nft_payload_fast_eval(expr, data, pkt)) + expr->ops->eval(expr, data, pkt); + + if (data[NFT_REG_VERDICT].verdict != NFT_CONTINUE) + break; + } + + switch (data[NFT_REG_VERDICT].verdict) { + case NFT_BREAK: + data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; + /* fall through */ + case NFT_CONTINUE: + continue; + } + break; + } + + switch (data[NFT_REG_VERDICT].verdict) { + case NF_ACCEPT: + case NF_DROP: + case NF_QUEUE: + if (unlikely(pkt->skb->nf_trace)) + nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); + + return data[NFT_REG_VERDICT].verdict; + case NFT_JUMP: + if (unlikely(pkt->skb->nf_trace)) + nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); + + BUG_ON(stackptr >= NFT_JUMP_STACK_SIZE); + jumpstack[stackptr].chain = chain; + jumpstack[stackptr].rule = rule; + jumpstack[stackptr].rulenum = rulenum; + stackptr++; + /* fall through */ + case NFT_GOTO: + chain = data[NFT_REG_VERDICT].chain; + goto do_chain; + case NFT_RETURN: + if (unlikely(pkt->skb->nf_trace)) + nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN); + + /* fall through */ + case NFT_CONTINUE: + break; + default: + WARN_ON(1); + } + + if (stackptr > 0) { + if (unlikely(pkt->skb->nf_trace)) + nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_RETURN); + + stackptr--; + chain = jumpstack[stackptr].chain; + rule = jumpstack[stackptr].rule; + rulenum = jumpstack[stackptr].rulenum; + goto next_rule; + } + nft_chain_stats(chain, pkt, jumpstack, stackptr); + + if (unlikely(pkt->skb->nf_trace)) + nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_POLICY); + + return nft_base_chain(chain)->policy; +} +EXPORT_SYMBOL_GPL(nft_do_chain_pktinfo); + +int __init nf_tables_core_module_init(void) +{ + int err; + + err = nft_immediate_module_init(); + if (err < 0) + goto err1; + + err = nft_cmp_module_init(); + if (err < 0) + goto err2; + + err = nft_lookup_module_init(); + if (err < 0) + goto err3; + + err = nft_bitwise_module_init(); + if (err < 0) + goto err4; + + err = nft_byteorder_module_init(); + if (err < 0) + goto err5; + + err = nft_payload_module_init(); + if (err < 0) + goto err6; + + return 0; + +err6: + nft_byteorder_module_exit(); +err5: + nft_bitwise_module_exit(); +err4: + nft_lookup_module_exit(); +err3: + nft_cmp_module_exit(); +err2: + nft_immediate_module_exit(); +err1: + return err; +} + +void nf_tables_core_module_exit(void) +{ + nft_payload_module_exit(); + nft_byteorder_module_exit(); + nft_bitwise_module_exit(); + nft_lookup_module_exit(); + nft_cmp_module_exit(); + nft_immediate_module_exit(); +} diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 572d87dc116f..027f16af51a0 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -147,9 +147,6 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) const struct nfnetlink_subsystem *ss; int type, err; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - return -EPERM; - /* All the messages must at least contain nfgenmsg */ if (nlmsg_len(nlh) < sizeof(struct nfgenmsg)) return 0; @@ -217,9 +214,179 @@ replay: } } +static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, + u_int16_t subsys_id) +{ + struct sk_buff *nskb, *oskb = skb; + struct net *net = sock_net(skb->sk); + const struct nfnetlink_subsystem *ss; + const struct nfnl_callback *nc; + bool success = true, done = false; + int err; + + if (subsys_id >= NFNL_SUBSYS_COUNT) + return netlink_ack(skb, nlh, -EINVAL); +replay: + nskb = netlink_skb_clone(oskb, GFP_KERNEL); + if (!nskb) + return netlink_ack(oskb, nlh, -ENOMEM); + + nskb->sk = oskb->sk; + skb = nskb; + + nfnl_lock(subsys_id); + ss = rcu_dereference_protected(table[subsys_id].subsys, + lockdep_is_held(&table[subsys_id].mutex)); + if (!ss) { +#ifdef CONFIG_MODULES + nfnl_unlock(subsys_id); + request_module("nfnetlink-subsys-%d", subsys_id); + nfnl_lock(subsys_id); + ss = rcu_dereference_protected(table[subsys_id].subsys, + lockdep_is_held(&table[subsys_id].mutex)); + if (!ss) +#endif + { + nfnl_unlock(subsys_id); + kfree_skb(nskb); + return netlink_ack(skb, nlh, -EOPNOTSUPP); + } + } + + if (!ss->commit || !ss->abort) { + nfnl_unlock(subsys_id); + kfree_skb(nskb); + return netlink_ack(skb, nlh, -EOPNOTSUPP); + } + + while (skb->len >= nlmsg_total_size(0)) { + int msglen, type; + + nlh = nlmsg_hdr(skb); + err = 0; + + if (nlh->nlmsg_len < NLMSG_HDRLEN) { + err = -EINVAL; + goto ack; + } + + /* Only requests are handled by the kernel */ + if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) { + err = -EINVAL; + goto ack; + } + + type = nlh->nlmsg_type; + if (type == NFNL_MSG_BATCH_BEGIN) { + /* Malformed: Batch begin twice */ + success = false; + goto done; + } else if (type == NFNL_MSG_BATCH_END) { + done = true; + goto done; + } else if (type < NLMSG_MIN_TYPE) { + err = -EINVAL; + goto ack; + } + + /* We only accept a batch with messages for the same + * subsystem. + */ + if (NFNL_SUBSYS_ID(type) != subsys_id) { + err = -EINVAL; + goto ack; + } + + nc = nfnetlink_find_client(type, ss); + if (!nc) { + err = -EINVAL; + goto ack; + } + + { + int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); + u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); + struct nlattr *cda[ss->cb[cb_id].attr_count + 1]; + struct nlattr *attr = (void *)nlh + min_len; + int attrlen = nlh->nlmsg_len - min_len; + + err = nla_parse(cda, ss->cb[cb_id].attr_count, + attr, attrlen, ss->cb[cb_id].policy); + if (err < 0) + goto ack; + + if (nc->call_batch) { + err = nc->call_batch(net->nfnl, skb, nlh, + (const struct nlattr **)cda); + } + + /* The lock was released to autoload some module, we + * have to abort and start from scratch using the + * original skb. + */ + if (err == -EAGAIN) { + ss->abort(skb); + nfnl_unlock(subsys_id); + kfree_skb(nskb); + goto replay; + } + } +ack: + if (nlh->nlmsg_flags & NLM_F_ACK || err) { + /* We don't stop processing the batch on errors, thus, + * userspace gets all the errors that the batch + * triggers. + */ + netlink_ack(skb, nlh, err); + if (err) + success = false; + } + + msglen = NLMSG_ALIGN(nlh->nlmsg_len); + if (msglen > skb->len) + msglen = skb->len; + skb_pull(skb, msglen); + } +done: + if (success && done) + ss->commit(skb); + else + ss->abort(skb); + + nfnl_unlock(subsys_id); + kfree_skb(nskb); +} + static void nfnetlink_rcv(struct sk_buff *skb) { - netlink_rcv_skb(skb, &nfnetlink_rcv_msg); + struct nlmsghdr *nlh = nlmsg_hdr(skb); + struct net *net = sock_net(skb->sk); + int msglen; + + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + return netlink_ack(skb, nlh, -EPERM); + + if (nlh->nlmsg_len < NLMSG_HDRLEN || + skb->len < nlh->nlmsg_len) + return; + + if (nlh->nlmsg_type == NFNL_MSG_BATCH_BEGIN) { + struct nfgenmsg *nfgenmsg; + + msglen = NLMSG_ALIGN(nlh->nlmsg_len); + if (msglen > skb->len) + msglen = skb->len; + + if (nlh->nlmsg_len < NLMSG_HDRLEN || + skb->len < NLMSG_HDRLEN + sizeof(struct nfgenmsg)) + return; + + nfgenmsg = nlmsg_data(nlh); + skb_pull(skb, msglen); + nfnetlink_rcv_batch(skb, nlh, nfgenmsg->res_id); + } else { + netlink_rcv_skb(skb, &nfnetlink_rcv_msg); + } } #ifdef CONFIG_MODULES diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c new file mode 100644 index 000000000000..4fb6ee2c1106 --- /dev/null +++ b/net/netfilter/nft_bitwise.c @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> +#include <net/netfilter/nf_tables.h> + +struct nft_bitwise { + enum nft_registers sreg:8; + enum nft_registers dreg:8; + u8 len; + struct nft_data mask; + struct nft_data xor; +}; + +static void nft_bitwise_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_bitwise *priv = nft_expr_priv(expr); + const struct nft_data *src = &data[priv->sreg]; + struct nft_data *dst = &data[priv->dreg]; + unsigned int i; + + for (i = 0; i < DIV_ROUND_UP(priv->len, 4); i++) { + dst->data[i] = (src->data[i] & priv->mask.data[i]) ^ + priv->xor.data[i]; + } +} + +static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = { + [NFTA_BITWISE_SREG] = { .type = NLA_U32 }, + [NFTA_BITWISE_DREG] = { .type = NLA_U32 }, + [NFTA_BITWISE_LEN] = { .type = NLA_U32 }, + [NFTA_BITWISE_MASK] = { .type = NLA_NESTED }, + [NFTA_BITWISE_XOR] = { .type = NLA_NESTED }, +}; + +static int nft_bitwise_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_bitwise *priv = nft_expr_priv(expr); + struct nft_data_desc d1, d2; + int err; + + if (tb[NFTA_BITWISE_SREG] == NULL || + tb[NFTA_BITWISE_DREG] == NULL || + tb[NFTA_BITWISE_LEN] == NULL || + tb[NFTA_BITWISE_MASK] == NULL || + tb[NFTA_BITWISE_XOR] == NULL) + return -EINVAL; + + priv->sreg = ntohl(nla_get_be32(tb[NFTA_BITWISE_SREG])); + err = nft_validate_input_register(priv->sreg); + if (err < 0) + return err; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_BITWISE_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); + if (err < 0) + return err; + + priv->len = ntohl(nla_get_be32(tb[NFTA_BITWISE_LEN])); + + err = nft_data_init(NULL, &priv->mask, &d1, tb[NFTA_BITWISE_MASK]); + if (err < 0) + return err; + if (d1.len != priv->len) + return -EINVAL; + + err = nft_data_init(NULL, &priv->xor, &d2, tb[NFTA_BITWISE_XOR]); + if (err < 0) + return err; + if (d2.len != priv->len) + return -EINVAL; + + return 0; +} + +static int nft_bitwise_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_bitwise *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_BITWISE_SREG, htonl(priv->sreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_BITWISE_DREG, htonl(priv->dreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_BITWISE_LEN, htonl(priv->len))) + goto nla_put_failure; + + if (nft_data_dump(skb, NFTA_BITWISE_MASK, &priv->mask, + NFT_DATA_VALUE, priv->len) < 0) + goto nla_put_failure; + + if (nft_data_dump(skb, NFTA_BITWISE_XOR, &priv->xor, + NFT_DATA_VALUE, priv->len) < 0) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_bitwise_type; +static const struct nft_expr_ops nft_bitwise_ops = { + .type = &nft_bitwise_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_bitwise)), + .eval = nft_bitwise_eval, + .init = nft_bitwise_init, + .dump = nft_bitwise_dump, +}; + +static struct nft_expr_type nft_bitwise_type __read_mostly = { + .name = "bitwise", + .ops = &nft_bitwise_ops, + .policy = nft_bitwise_policy, + .maxattr = NFTA_BITWISE_MAX, + .owner = THIS_MODULE, +}; + +int __init nft_bitwise_module_init(void) +{ + return nft_register_expr(&nft_bitwise_type); +} + +void nft_bitwise_module_exit(void) +{ + nft_unregister_expr(&nft_bitwise_type); +} diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c new file mode 100644 index 000000000000..c39ed8d29df1 --- /dev/null +++ b/net/netfilter/nft_byteorder.c @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> +#include <net/netfilter/nf_tables.h> + +struct nft_byteorder { + enum nft_registers sreg:8; + enum nft_registers dreg:8; + enum nft_byteorder_ops op:8; + u8 len; + u8 size; +}; + +static void nft_byteorder_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_byteorder *priv = nft_expr_priv(expr); + struct nft_data *src = &data[priv->sreg], *dst = &data[priv->dreg]; + union { u32 u32; u16 u16; } *s, *d; + unsigned int i; + + s = (void *)src->data; + d = (void *)dst->data; + + switch (priv->size) { + case 4: + switch (priv->op) { + case NFT_BYTEORDER_NTOH: + for (i = 0; i < priv->len / 4; i++) + d[i].u32 = ntohl((__force __be32)s[i].u32); + break; + case NFT_BYTEORDER_HTON: + for (i = 0; i < priv->len / 4; i++) + d[i].u32 = (__force __u32)htonl(s[i].u32); + break; + } + break; + case 2: + switch (priv->op) { + case NFT_BYTEORDER_NTOH: + for (i = 0; i < priv->len / 2; i++) + d[i].u16 = ntohs((__force __be16)s[i].u16); + break; + case NFT_BYTEORDER_HTON: + for (i = 0; i < priv->len / 2; i++) + d[i].u16 = (__force __u16)htons(s[i].u16); + break; + } + break; + } +} + +static const struct nla_policy nft_byteorder_policy[NFTA_BYTEORDER_MAX + 1] = { + [NFTA_BYTEORDER_SREG] = { .type = NLA_U32 }, + [NFTA_BYTEORDER_DREG] = { .type = NLA_U32 }, + [NFTA_BYTEORDER_OP] = { .type = NLA_U32 }, + [NFTA_BYTEORDER_LEN] = { .type = NLA_U32 }, + [NFTA_BYTEORDER_SIZE] = { .type = NLA_U32 }, +}; + +static int nft_byteorder_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_byteorder *priv = nft_expr_priv(expr); + int err; + + if (tb[NFTA_BYTEORDER_SREG] == NULL || + tb[NFTA_BYTEORDER_DREG] == NULL || + tb[NFTA_BYTEORDER_LEN] == NULL || + tb[NFTA_BYTEORDER_SIZE] == NULL || + tb[NFTA_BYTEORDER_OP] == NULL) + return -EINVAL; + + priv->sreg = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SREG])); + err = nft_validate_input_register(priv->sreg); + if (err < 0) + return err; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); + if (err < 0) + return err; + + priv->op = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_OP])); + switch (priv->op) { + case NFT_BYTEORDER_NTOH: + case NFT_BYTEORDER_HTON: + break; + default: + return -EINVAL; + } + + priv->len = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_LEN])); + if (priv->len == 0 || priv->len > FIELD_SIZEOF(struct nft_data, data)) + return -EINVAL; + + priv->size = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SIZE])); + switch (priv->size) { + case 2: + case 4: + break; + default: + return -EINVAL; + } + + return 0; +} + +static int nft_byteorder_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_byteorder *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_BYTEORDER_SREG, htonl(priv->sreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_BYTEORDER_DREG, htonl(priv->dreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_BYTEORDER_OP, htonl(priv->op))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_BYTEORDER_LEN, htonl(priv->len))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_BYTEORDER_SIZE, htonl(priv->size))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_byteorder_type; +static const struct nft_expr_ops nft_byteorder_ops = { + .type = &nft_byteorder_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_byteorder)), + .eval = nft_byteorder_eval, + .init = nft_byteorder_init, + .dump = nft_byteorder_dump, +}; + +static struct nft_expr_type nft_byteorder_type __read_mostly = { + .name = "byteorder", + .ops = &nft_byteorder_ops, + .policy = nft_byteorder_policy, + .maxattr = NFTA_BYTEORDER_MAX, + .owner = THIS_MODULE, +}; + +int __init nft_byteorder_module_init(void) +{ + return nft_register_expr(&nft_byteorder_type); +} + +void nft_byteorder_module_exit(void) +{ + nft_unregister_expr(&nft_byteorder_type); +} diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c new file mode 100644 index 000000000000..954925db414d --- /dev/null +++ b/net/netfilter/nft_cmp.c @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> +#include <net/netfilter/nf_tables.h> + +struct nft_cmp_expr { + struct nft_data data; + enum nft_registers sreg:8; + u8 len; + enum nft_cmp_ops op:8; +}; + +static void nft_cmp_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_cmp_expr *priv = nft_expr_priv(expr); + int d; + + d = nft_data_cmp(&data[priv->sreg], &priv->data, priv->len); + switch (priv->op) { + case NFT_CMP_EQ: + if (d != 0) + goto mismatch; + break; + case NFT_CMP_NEQ: + if (d == 0) + goto mismatch; + break; + case NFT_CMP_LT: + if (d == 0) + goto mismatch; + case NFT_CMP_LTE: + if (d > 0) + goto mismatch; + break; + case NFT_CMP_GT: + if (d == 0) + goto mismatch; + case NFT_CMP_GTE: + if (d < 0) + goto mismatch; + break; + } + return; + +mismatch: + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static const struct nla_policy nft_cmp_policy[NFTA_CMP_MAX + 1] = { + [NFTA_CMP_SREG] = { .type = NLA_U32 }, + [NFTA_CMP_OP] = { .type = NLA_U32 }, + [NFTA_CMP_DATA] = { .type = NLA_NESTED }, +}; + +static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_cmp_expr *priv = nft_expr_priv(expr); + struct nft_data_desc desc; + int err; + + priv->sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG])); + priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP])); + + err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_CMP_DATA]); + BUG_ON(err < 0); + + priv->len = desc.len; + return 0; +} + +static int nft_cmp_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_cmp_expr *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_CMP_SREG, htonl(priv->sreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_CMP_OP, htonl(priv->op))) + goto nla_put_failure; + + if (nft_data_dump(skb, NFTA_CMP_DATA, &priv->data, + NFT_DATA_VALUE, priv->len) < 0) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_cmp_type; +static const struct nft_expr_ops nft_cmp_ops = { + .type = &nft_cmp_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_cmp_expr)), + .eval = nft_cmp_eval, + .init = nft_cmp_init, + .dump = nft_cmp_dump, +}; + +static int nft_cmp_fast_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_cmp_fast_expr *priv = nft_expr_priv(expr); + struct nft_data_desc desc; + struct nft_data data; + u32 mask; + int err; + + priv->sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG])); + + err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]); + BUG_ON(err < 0); + desc.len *= BITS_PER_BYTE; + + mask = ~0U >> (sizeof(priv->data) * BITS_PER_BYTE - desc.len); + priv->data = data.data[0] & mask; + priv->len = desc.len; + return 0; +} + +static int nft_cmp_fast_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr); + struct nft_data data; + + if (nla_put_be32(skb, NFTA_CMP_SREG, htonl(priv->sreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_CMP_OP, htonl(NFT_CMP_EQ))) + goto nla_put_failure; + + data.data[0] = priv->data; + if (nft_data_dump(skb, NFTA_CMP_DATA, &data, + NFT_DATA_VALUE, priv->len / BITS_PER_BYTE) < 0) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +const struct nft_expr_ops nft_cmp_fast_ops = { + .type = &nft_cmp_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_cmp_fast_expr)), + .eval = NULL, /* inlined */ + .init = nft_cmp_fast_init, + .dump = nft_cmp_fast_dump, +}; + +static const struct nft_expr_ops * +nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) +{ + struct nft_data_desc desc; + struct nft_data data; + enum nft_registers sreg; + enum nft_cmp_ops op; + int err; + + if (tb[NFTA_CMP_SREG] == NULL || + tb[NFTA_CMP_OP] == NULL || + tb[NFTA_CMP_DATA] == NULL) + return ERR_PTR(-EINVAL); + + sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG])); + err = nft_validate_input_register(sreg); + if (err < 0) + return ERR_PTR(err); + + op = ntohl(nla_get_be32(tb[NFTA_CMP_OP])); + switch (op) { + case NFT_CMP_EQ: + case NFT_CMP_NEQ: + case NFT_CMP_LT: + case NFT_CMP_LTE: + case NFT_CMP_GT: + case NFT_CMP_GTE: + break; + default: + return ERR_PTR(-EINVAL); + } + + err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]); + if (err < 0) + return ERR_PTR(err); + + if (desc.len <= sizeof(u32) && op == NFT_CMP_EQ) + return &nft_cmp_fast_ops; + else + return &nft_cmp_ops; +} + +static struct nft_expr_type nft_cmp_type __read_mostly = { + .name = "cmp", + .select_ops = nft_cmp_select_ops, + .policy = nft_cmp_policy, + .maxattr = NFTA_CMP_MAX, + .owner = THIS_MODULE, +}; + +int __init nft_cmp_module_init(void) +{ + return nft_register_expr(&nft_cmp_type); +} + +void nft_cmp_module_exit(void) +{ + nft_unregister_expr(&nft_cmp_type); +} diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c new file mode 100644 index 000000000000..4811f762e060 --- /dev/null +++ b/net/netfilter/nft_compat.c @@ -0,0 +1,768 @@ +/* + * (C) 2012-2013 by Pablo Neira Ayuso <pablo@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This software has been sponsored by Sophos Astaro <http://www.sophos.com> + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nf_tables.h> +#include <linux/netfilter/nf_tables_compat.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter_ipv6/ip6_tables.h> +#include <asm/uaccess.h> /* for set_fs */ +#include <net/netfilter/nf_tables.h> + +union nft_entry { + struct ipt_entry e4; + struct ip6t_entry e6; +}; + +static inline void +nft_compat_set_par(struct xt_action_param *par, void *xt, const void *xt_info) +{ + par->target = xt; + par->targinfo = xt_info; + par->hotdrop = false; +} + +static void nft_target_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + void *info = nft_expr_priv(expr); + struct xt_target *target = expr->ops->data; + struct sk_buff *skb = pkt->skb; + int ret; + + nft_compat_set_par((struct xt_action_param *)&pkt->xt, target, info); + + ret = target->target(skb, &pkt->xt); + + if (pkt->xt.hotdrop) + ret = NF_DROP; + + switch(ret) { + case XT_CONTINUE: + data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; + break; + default: + data[NFT_REG_VERDICT].verdict = ret; + break; + } + return; +} + +static const struct nla_policy nft_target_policy[NFTA_TARGET_MAX + 1] = { + [NFTA_TARGET_NAME] = { .type = NLA_NUL_STRING }, + [NFTA_TARGET_REV] = { .type = NLA_U32 }, + [NFTA_TARGET_INFO] = { .type = NLA_BINARY }, +}; + +static void +nft_target_set_tgchk_param(struct xt_tgchk_param *par, + const struct nft_ctx *ctx, + struct xt_target *target, void *info, + union nft_entry *entry, u8 proto, bool inv) +{ + par->net = &init_net; + par->table = ctx->table->name; + switch (ctx->afi->family) { + case AF_INET: + entry->e4.ip.proto = proto; + entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0; + break; + case AF_INET6: + entry->e6.ipv6.proto = proto; + entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0; + break; + } + par->entryinfo = entry; + par->target = target; + par->targinfo = info; + if (ctx->chain->flags & NFT_BASE_CHAIN) { + const struct nft_base_chain *basechain = + nft_base_chain(ctx->chain); + const struct nf_hook_ops *ops = &basechain->ops; + + par->hook_mask = 1 << ops->hooknum; + } + par->family = ctx->afi->family; +} + +static void target_compat_from_user(struct xt_target *t, void *in, void *out) +{ +#ifdef CONFIG_COMPAT + if (t->compat_from_user) { + int pad; + + t->compat_from_user(out, in); + pad = XT_ALIGN(t->targetsize) - t->targetsize; + if (pad > 0) + memset(out + t->targetsize, 0, pad); + } else +#endif + memcpy(out, in, XT_ALIGN(t->targetsize)); +} + +static inline int nft_compat_target_offset(struct xt_target *target) +{ +#ifdef CONFIG_COMPAT + return xt_compat_target_offset(target); +#else + return 0; +#endif +} + +static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1] = { + [NFTA_RULE_COMPAT_PROTO] = { .type = NLA_U32 }, + [NFTA_RULE_COMPAT_FLAGS] = { .type = NLA_U32 }, +}; + +static u8 nft_parse_compat(const struct nlattr *attr, bool *inv) +{ + struct nlattr *tb[NFTA_RULE_COMPAT_MAX+1]; + u32 flags; + int err; + + err = nla_parse_nested(tb, NFTA_RULE_COMPAT_MAX, attr, + nft_rule_compat_policy); + if (err < 0) + return err; + + if (!tb[NFTA_RULE_COMPAT_PROTO] || !tb[NFTA_RULE_COMPAT_FLAGS]) + return -EINVAL; + + flags = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_FLAGS])); + if (flags & ~NFT_RULE_COMPAT_F_MASK) + return -EINVAL; + if (flags & NFT_RULE_COMPAT_F_INV) + *inv = true; + + return ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO])); +} + +static int +nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + void *info = nft_expr_priv(expr); + struct xt_target *target = expr->ops->data; + struct xt_tgchk_param par; + size_t size = XT_ALIGN(nla_len(tb[NFTA_TARGET_INFO])); + u8 proto = 0; + bool inv = false; + union nft_entry e = {}; + int ret; + + target_compat_from_user(target, nla_data(tb[NFTA_TARGET_INFO]), info); + + if (ctx->nla[NFTA_RULE_COMPAT]) + proto = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &inv); + + nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv); + + ret = xt_check_target(&par, size, proto, inv); + if (ret < 0) + goto err; + + /* The standard target cannot be used */ + if (target->target == NULL) { + ret = -EINVAL; + goto err; + } + + return 0; +err: + module_put(target->me); + return ret; +} + +static void +nft_target_destroy(const struct nft_expr *expr) +{ + struct xt_target *target = expr->ops->data; + + module_put(target->me); +} + +static int +target_dump_info(struct sk_buff *skb, const struct xt_target *t, const void *in) +{ + int ret; + +#ifdef CONFIG_COMPAT + if (t->compat_to_user) { + mm_segment_t old_fs; + void *out; + + out = kmalloc(XT_ALIGN(t->targetsize), GFP_ATOMIC); + if (out == NULL) + return -ENOMEM; + + /* We want to reuse existing compat_to_user */ + old_fs = get_fs(); + set_fs(KERNEL_DS); + t->compat_to_user(out, in); + set_fs(old_fs); + ret = nla_put(skb, NFTA_TARGET_INFO, XT_ALIGN(t->targetsize), out); + kfree(out); + } else +#endif + ret = nla_put(skb, NFTA_TARGET_INFO, XT_ALIGN(t->targetsize), in); + + return ret; +} + +static int nft_target_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct xt_target *target = expr->ops->data; + void *info = nft_expr_priv(expr); + + if (nla_put_string(skb, NFTA_TARGET_NAME, target->name) || + nla_put_be32(skb, NFTA_TARGET_REV, htonl(target->revision)) || + target_dump_info(skb, target, info)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static int nft_target_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + struct xt_target *target = expr->ops->data; + unsigned int hook_mask = 0; + + if (ctx->chain->flags & NFT_BASE_CHAIN) { + const struct nft_base_chain *basechain = + nft_base_chain(ctx->chain); + const struct nf_hook_ops *ops = &basechain->ops; + + hook_mask = 1 << ops->hooknum; + if (hook_mask & target->hooks) + return 0; + + /* This target is being called from an invalid chain */ + return -EINVAL; + } + return 0; +} + +static void nft_match_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + void *info = nft_expr_priv(expr); + struct xt_match *match = expr->ops->data; + struct sk_buff *skb = pkt->skb; + bool ret; + + nft_compat_set_par((struct xt_action_param *)&pkt->xt, match, info); + + ret = match->match(skb, (struct xt_action_param *)&pkt->xt); + + if (pkt->xt.hotdrop) { + data[NFT_REG_VERDICT].verdict = NF_DROP; + return; + } + + switch(ret) { + case true: + data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; + break; + case false: + data[NFT_REG_VERDICT].verdict = NFT_BREAK; + break; + } +} + +static const struct nla_policy nft_match_policy[NFTA_MATCH_MAX + 1] = { + [NFTA_MATCH_NAME] = { .type = NLA_NUL_STRING }, + [NFTA_MATCH_REV] = { .type = NLA_U32 }, + [NFTA_MATCH_INFO] = { .type = NLA_BINARY }, +}; + +/* struct xt_mtchk_param and xt_tgchk_param look very similar */ +static void +nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx, + struct xt_match *match, void *info, + union nft_entry *entry, u8 proto, bool inv) +{ + par->net = &init_net; + par->table = ctx->table->name; + switch (ctx->afi->family) { + case AF_INET: + entry->e4.ip.proto = proto; + entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0; + break; + case AF_INET6: + entry->e6.ipv6.proto = proto; + entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0; + break; + } + par->entryinfo = entry; + par->match = match; + par->matchinfo = info; + if (ctx->chain->flags & NFT_BASE_CHAIN) { + const struct nft_base_chain *basechain = + nft_base_chain(ctx->chain); + const struct nf_hook_ops *ops = &basechain->ops; + + par->hook_mask = 1 << ops->hooknum; + } + par->family = ctx->afi->family; +} + +static void match_compat_from_user(struct xt_match *m, void *in, void *out) +{ +#ifdef CONFIG_COMPAT + if (m->compat_from_user) { + int pad; + + m->compat_from_user(out, in); + pad = XT_ALIGN(m->matchsize) - m->matchsize; + if (pad > 0) + memset(out + m->matchsize, 0, pad); + } else +#endif + memcpy(out, in, XT_ALIGN(m->matchsize)); +} + +static int +nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + void *info = nft_expr_priv(expr); + struct xt_match *match = expr->ops->data; + struct xt_mtchk_param par; + size_t size = XT_ALIGN(nla_len(tb[NFTA_MATCH_INFO])); + u8 proto = 0; + bool inv = false; + union nft_entry e = {}; + int ret; + + match_compat_from_user(match, nla_data(tb[NFTA_MATCH_INFO]), info); + + if (ctx->nla[NFTA_RULE_COMPAT]) + proto = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &inv); + + nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv); + + ret = xt_check_match(&par, size, proto, inv); + if (ret < 0) + goto err; + + return 0; +err: + module_put(match->me); + return ret; +} + +static void +nft_match_destroy(const struct nft_expr *expr) +{ + struct xt_match *match = expr->ops->data; + + module_put(match->me); +} + +static int +match_dump_info(struct sk_buff *skb, const struct xt_match *m, const void *in) +{ + int ret; + +#ifdef CONFIG_COMPAT + if (m->compat_to_user) { + mm_segment_t old_fs; + void *out; + + out = kmalloc(XT_ALIGN(m->matchsize), GFP_ATOMIC); + if (out == NULL) + return -ENOMEM; + + /* We want to reuse existing compat_to_user */ + old_fs = get_fs(); + set_fs(KERNEL_DS); + m->compat_to_user(out, in); + set_fs(old_fs); + ret = nla_put(skb, NFTA_MATCH_INFO, XT_ALIGN(m->matchsize), out); + kfree(out); + } else +#endif + ret = nla_put(skb, NFTA_MATCH_INFO, XT_ALIGN(m->matchsize), in); + + return ret; +} + +static inline int nft_compat_match_offset(struct xt_match *match) +{ +#ifdef CONFIG_COMPAT + return xt_compat_match_offset(match); +#else + return 0; +#endif +} + +static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + void *info = nft_expr_priv(expr); + struct xt_match *match = expr->ops->data; + + if (nla_put_string(skb, NFTA_MATCH_NAME, match->name) || + nla_put_be32(skb, NFTA_MATCH_REV, htonl(match->revision)) || + match_dump_info(skb, match, info)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static int nft_match_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + struct xt_match *match = expr->ops->data; + unsigned int hook_mask = 0; + + if (ctx->chain->flags & NFT_BASE_CHAIN) { + const struct nft_base_chain *basechain = + nft_base_chain(ctx->chain); + const struct nf_hook_ops *ops = &basechain->ops; + + hook_mask = 1 << ops->hooknum; + if (hook_mask & match->hooks) + return 0; + + /* This match is being called from an invalid chain */ + return -EINVAL; + } + return 0; +} + +static int +nfnl_compat_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, + int event, u16 family, const char *name, + int rev, int target) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + unsigned int flags = portid ? NLM_F_MULTI : 0; + + event |= NFNL_SUBSYS_NFT_COMPAT << 8; + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); + if (nlh == NULL) + goto nlmsg_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_string(skb, NFTA_COMPAT_NAME, name) || + nla_put_be32(skb, NFTA_COMPAT_REV, htonl(rev)) || + nla_put_be32(skb, NFTA_COMPAT_TYPE, htonl(target))) + goto nla_put_failure; + + nlmsg_end(skb, nlh); + return skb->len; + +nlmsg_failure: +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -1; +} + +static int +nfnl_compat_get(struct sock *nfnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +{ + int ret = 0, target; + struct nfgenmsg *nfmsg; + const char *fmt; + const char *name; + u32 rev; + struct sk_buff *skb2; + + if (tb[NFTA_COMPAT_NAME] == NULL || + tb[NFTA_COMPAT_REV] == NULL || + tb[NFTA_COMPAT_TYPE] == NULL) + return -EINVAL; + + name = nla_data(tb[NFTA_COMPAT_NAME]); + rev = ntohl(nla_get_be32(tb[NFTA_COMPAT_REV])); + target = ntohl(nla_get_be32(tb[NFTA_COMPAT_TYPE])); + + nfmsg = nlmsg_data(nlh); + + switch(nfmsg->nfgen_family) { + case AF_INET: + fmt = "ipt_%s"; + break; + case AF_INET6: + fmt = "ip6t_%s"; + break; + default: + pr_err("nft_compat: unsupported protocol %d\n", + nfmsg->nfgen_family); + return -EINVAL; + } + + try_then_request_module(xt_find_revision(nfmsg->nfgen_family, name, + rev, target, &ret), + fmt, name); + + if (ret < 0) + return ret; + + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb2 == NULL) + return -ENOMEM; + + /* include the best revision for this extension in the message */ + if (nfnl_compat_fill_info(skb2, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, + NFNL_MSG_TYPE(nlh->nlmsg_type), + NFNL_MSG_COMPAT_GET, + nfmsg->nfgen_family, + name, ret, target) <= 0) { + kfree_skb(skb2); + return -ENOSPC; + } + + ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid, + MSG_DONTWAIT); + if (ret > 0) + ret = 0; + + return ret == -EAGAIN ? -ENOBUFS : ret; +} + +static const struct nla_policy nfnl_compat_policy_get[NFTA_COMPAT_MAX+1] = { + [NFTA_COMPAT_NAME] = { .type = NLA_NUL_STRING, + .len = NFT_COMPAT_NAME_MAX-1 }, + [NFTA_COMPAT_REV] = { .type = NLA_U32 }, + [NFTA_COMPAT_TYPE] = { .type = NLA_U32 }, +}; + +static const struct nfnl_callback nfnl_nft_compat_cb[NFNL_MSG_COMPAT_MAX] = { + [NFNL_MSG_COMPAT_GET] = { .call = nfnl_compat_get, + .attr_count = NFTA_COMPAT_MAX, + .policy = nfnl_compat_policy_get }, +}; + +static const struct nfnetlink_subsystem nfnl_compat_subsys = { + .name = "nft-compat", + .subsys_id = NFNL_SUBSYS_NFT_COMPAT, + .cb_count = NFNL_MSG_COMPAT_MAX, + .cb = nfnl_nft_compat_cb, +}; + +static LIST_HEAD(nft_match_list); + +struct nft_xt { + struct list_head head; + struct nft_expr_ops ops; +}; + +static struct nft_expr_type nft_match_type; + +static const struct nft_expr_ops * +nft_match_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + struct nft_xt *nft_match; + struct xt_match *match; + char *mt_name; + __u32 rev, family; + + if (tb[NFTA_MATCH_NAME] == NULL || + tb[NFTA_MATCH_REV] == NULL || + tb[NFTA_MATCH_INFO] == NULL) + return ERR_PTR(-EINVAL); + + mt_name = nla_data(tb[NFTA_MATCH_NAME]); + rev = ntohl(nla_get_be32(tb[NFTA_MATCH_REV])); + family = ctx->afi->family; + + /* Re-use the existing match if it's already loaded. */ + list_for_each_entry(nft_match, &nft_match_list, head) { + struct xt_match *match = nft_match->ops.data; + + if (strcmp(match->name, mt_name) == 0 && + match->revision == rev && match->family == family) + return &nft_match->ops; + } + + match = xt_request_find_match(family, mt_name, rev); + if (IS_ERR(match)) + return ERR_PTR(-ENOENT); + + /* This is the first time we use this match, allocate operations */ + nft_match = kzalloc(sizeof(struct nft_xt), GFP_KERNEL); + if (nft_match == NULL) + return ERR_PTR(-ENOMEM); + + nft_match->ops.type = &nft_match_type; + nft_match->ops.size = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize) + + nft_compat_match_offset(match)); + nft_match->ops.eval = nft_match_eval; + nft_match->ops.init = nft_match_init; + nft_match->ops.destroy = nft_match_destroy; + nft_match->ops.dump = nft_match_dump; + nft_match->ops.validate = nft_match_validate; + nft_match->ops.data = match; + + list_add(&nft_match->head, &nft_match_list); + + return &nft_match->ops; +} + +static void nft_match_release(void) +{ + struct nft_xt *nft_match; + + list_for_each_entry(nft_match, &nft_match_list, head) + kfree(nft_match); +} + +static struct nft_expr_type nft_match_type __read_mostly = { + .name = "match", + .select_ops = nft_match_select_ops, + .policy = nft_match_policy, + .maxattr = NFTA_MATCH_MAX, + .owner = THIS_MODULE, +}; + +static LIST_HEAD(nft_target_list); + +static struct nft_expr_type nft_target_type; + +static const struct nft_expr_ops * +nft_target_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + struct nft_xt *nft_target; + struct xt_target *target; + char *tg_name; + __u32 rev, family; + + if (tb[NFTA_TARGET_NAME] == NULL || + tb[NFTA_TARGET_REV] == NULL || + tb[NFTA_TARGET_INFO] == NULL) + return ERR_PTR(-EINVAL); + + tg_name = nla_data(tb[NFTA_TARGET_NAME]); + rev = ntohl(nla_get_be32(tb[NFTA_TARGET_REV])); + family = ctx->afi->family; + + /* Re-use the existing target if it's already loaded. */ + list_for_each_entry(nft_target, &nft_match_list, head) { + struct xt_target *target = nft_target->ops.data; + + if (strcmp(target->name, tg_name) == 0 && + target->revision == rev && target->family == family) + return &nft_target->ops; + } + + target = xt_request_find_target(family, tg_name, rev); + if (IS_ERR(target)) + return ERR_PTR(-ENOENT); + + /* This is the first time we use this target, allocate operations */ + nft_target = kzalloc(sizeof(struct nft_xt), GFP_KERNEL); + if (nft_target == NULL) + return ERR_PTR(-ENOMEM); + + nft_target->ops.type = &nft_target_type; + nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize) + + nft_compat_target_offset(target)); + nft_target->ops.eval = nft_target_eval; + nft_target->ops.init = nft_target_init; + nft_target->ops.destroy = nft_target_destroy; + nft_target->ops.dump = nft_target_dump; + nft_target->ops.validate = nft_target_validate; + nft_target->ops.data = target; + + list_add(&nft_target->head, &nft_target_list); + + return &nft_target->ops; +} + +static void nft_target_release(void) +{ + struct nft_xt *nft_target; + + list_for_each_entry(nft_target, &nft_target_list, head) + kfree(nft_target); +} + +static struct nft_expr_type nft_target_type __read_mostly = { + .name = "target", + .select_ops = nft_target_select_ops, + .policy = nft_target_policy, + .maxattr = NFTA_TARGET_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_compat_module_init(void) +{ + int ret; + + ret = nft_register_expr(&nft_match_type); + if (ret < 0) + return ret; + + ret = nft_register_expr(&nft_target_type); + if (ret < 0) + goto err_match; + + ret = nfnetlink_subsys_register(&nfnl_compat_subsys); + if (ret < 0) { + pr_err("nft_compat: cannot register with nfnetlink.\n"); + goto err_target; + } + + pr_info("nf_tables_compat: (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>\n"); + + return ret; + +err_target: + nft_unregister_expr(&nft_target_type); +err_match: + nft_unregister_expr(&nft_match_type); + return ret; +} + +static void __exit nft_compat_module_exit(void) +{ + nfnetlink_subsys_unregister(&nfnl_compat_subsys); + nft_unregister_expr(&nft_target_type); + nft_unregister_expr(&nft_match_type); + nft_match_release(); + nft_target_release(); +} + +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFT_COMPAT); + +module_init(nft_compat_module_init); +module_exit(nft_compat_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +MODULE_ALIAS_NFT_EXPR("match"); +MODULE_ALIAS_NFT_EXPR("target"); diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c new file mode 100644 index 000000000000..c89ee486ce54 --- /dev/null +++ b/net/netfilter/nft_counter.c @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/seqlock.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> + +struct nft_counter { + seqlock_t lock; + u64 bytes; + u64 packets; +}; + +static void nft_counter_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_counter *priv = nft_expr_priv(expr); + + write_seqlock_bh(&priv->lock); + priv->bytes += pkt->skb->len; + priv->packets++; + write_sequnlock_bh(&priv->lock); +} + +static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + struct nft_counter *priv = nft_expr_priv(expr); + unsigned int seq; + u64 bytes; + u64 packets; + + do { + seq = read_seqbegin(&priv->lock); + bytes = priv->bytes; + packets = priv->packets; + } while (read_seqretry(&priv->lock, seq)); + + if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(bytes))) + goto nla_put_failure; + if (nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(packets))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = { + [NFTA_COUNTER_PACKETS] = { .type = NLA_U64 }, + [NFTA_COUNTER_BYTES] = { .type = NLA_U64 }, +}; + +static int nft_counter_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_counter *priv = nft_expr_priv(expr); + + if (tb[NFTA_COUNTER_PACKETS]) + priv->packets = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS])); + if (tb[NFTA_COUNTER_BYTES]) + priv->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES])); + + seqlock_init(&priv->lock); + return 0; +} + +static struct nft_expr_type nft_counter_type; +static const struct nft_expr_ops nft_counter_ops = { + .type = &nft_counter_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_counter)), + .eval = nft_counter_eval, + .init = nft_counter_init, + .dump = nft_counter_dump, +}; + +static struct nft_expr_type nft_counter_type __read_mostly = { + .name = "counter", + .ops = &nft_counter_ops, + .policy = nft_counter_policy, + .maxattr = NFTA_COUNTER_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_counter_module_init(void) +{ + return nft_register_expr(&nft_counter_type); +} + +static void __exit nft_counter_module_exit(void) +{ + nft_unregister_expr(&nft_counter_type); +} + +module_init(nft_counter_module_init); +module_exit(nft_counter_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS_NFT_EXPR("counter"); diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c new file mode 100644 index 000000000000..955f4e6e7089 --- /dev/null +++ b/net/netfilter/nft_ct.c @@ -0,0 +1,258 @@ +/* + * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_tuple.h> +#include <net/netfilter/nf_conntrack_helper.h> + +struct nft_ct { + enum nft_ct_keys key:8; + enum ip_conntrack_dir dir:8; + enum nft_registers dreg:8; + uint8_t family; +}; + +static void nft_ct_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_ct *priv = nft_expr_priv(expr); + struct nft_data *dest = &data[priv->dreg]; + enum ip_conntrack_info ctinfo; + const struct nf_conn *ct; + const struct nf_conn_help *help; + const struct nf_conntrack_tuple *tuple; + const struct nf_conntrack_helper *helper; + long diff; + unsigned int state; + + ct = nf_ct_get(pkt->skb, &ctinfo); + + switch (priv->key) { + case NFT_CT_STATE: + if (ct == NULL) + state = NF_CT_STATE_INVALID_BIT; + else if (nf_ct_is_untracked(ct)) + state = NF_CT_STATE_UNTRACKED_BIT; + else + state = NF_CT_STATE_BIT(ctinfo); + dest->data[0] = state; + return; + } + + if (ct == NULL) + goto err; + + switch (priv->key) { + case NFT_CT_DIRECTION: + dest->data[0] = CTINFO2DIR(ctinfo); + return; + case NFT_CT_STATUS: + dest->data[0] = ct->status; + return; +#ifdef CONFIG_NF_CONNTRACK_MARK + case NFT_CT_MARK: + dest->data[0] = ct->mark; + return; +#endif +#ifdef CONFIG_NF_CONNTRACK_SECMARK + case NFT_CT_SECMARK: + dest->data[0] = ct->secmark; + return; +#endif + case NFT_CT_EXPIRATION: + diff = (long)jiffies - (long)ct->timeout.expires; + if (diff < 0) + diff = 0; + dest->data[0] = jiffies_to_msecs(diff); + return; + case NFT_CT_HELPER: + if (ct->master == NULL) + goto err; + help = nfct_help(ct->master); + if (help == NULL) + goto err; + helper = rcu_dereference(help->helper); + if (helper == NULL) + goto err; + if (strlen(helper->name) >= sizeof(dest->data)) + goto err; + strncpy((char *)dest->data, helper->name, sizeof(dest->data)); + return; + } + + tuple = &ct->tuplehash[priv->dir].tuple; + switch (priv->key) { + case NFT_CT_L3PROTOCOL: + dest->data[0] = nf_ct_l3num(ct); + return; + case NFT_CT_SRC: + memcpy(dest->data, tuple->src.u3.all, + nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16); + return; + case NFT_CT_DST: + memcpy(dest->data, tuple->dst.u3.all, + nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16); + return; + case NFT_CT_PROTOCOL: + dest->data[0] = nf_ct_protonum(ct); + return; + case NFT_CT_PROTO_SRC: + dest->data[0] = (__force __u16)tuple->src.u.all; + return; + case NFT_CT_PROTO_DST: + dest->data[0] = (__force __u16)tuple->dst.u.all; + return; + } + return; +err: + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = { + [NFTA_CT_DREG] = { .type = NLA_U32 }, + [NFTA_CT_KEY] = { .type = NLA_U32 }, + [NFTA_CT_DIRECTION] = { .type = NLA_U8 }, +}; + +static int nft_ct_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_ct *priv = nft_expr_priv(expr); + int err; + + if (tb[NFTA_CT_DREG] == NULL || + tb[NFTA_CT_KEY] == NULL) + return -EINVAL; + + priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY])); + if (tb[NFTA_CT_DIRECTION] != NULL) { + priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]); + switch (priv->dir) { + case IP_CT_DIR_ORIGINAL: + case IP_CT_DIR_REPLY: + break; + default: + return -EINVAL; + } + } + + switch (priv->key) { + case NFT_CT_STATE: + case NFT_CT_DIRECTION: + case NFT_CT_STATUS: +#ifdef CONFIG_NF_CONNTRACK_MARK + case NFT_CT_MARK: +#endif +#ifdef CONFIG_NF_CONNTRACK_SECMARK + case NFT_CT_SECMARK: +#endif + case NFT_CT_EXPIRATION: + case NFT_CT_HELPER: + if (tb[NFTA_CT_DIRECTION] != NULL) + return -EINVAL; + break; + case NFT_CT_PROTOCOL: + case NFT_CT_SRC: + case NFT_CT_DST: + case NFT_CT_PROTO_SRC: + case NFT_CT_PROTO_DST: + if (tb[NFTA_CT_DIRECTION] == NULL) + return -EINVAL; + break; + default: + return -EOPNOTSUPP; + } + + err = nf_ct_l3proto_try_module_get(ctx->afi->family); + if (err < 0) + return err; + priv->family = ctx->afi->family; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_CT_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + goto err1; + + err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); + if (err < 0) + goto err1; + return 0; + +err1: + nf_ct_l3proto_module_put(ctx->afi->family); + return err; +} + +static void nft_ct_destroy(const struct nft_expr *expr) +{ + struct nft_ct *priv = nft_expr_priv(expr); + + nf_ct_l3proto_module_put(priv->family); +} + +static int nft_ct_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_ct *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_CT_DREG, htonl(priv->dreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key))) + goto nla_put_failure; + if (nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_ct_type; +static const struct nft_expr_ops nft_ct_ops = { + .type = &nft_ct_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)), + .eval = nft_ct_eval, + .init = nft_ct_init, + .destroy = nft_ct_destroy, + .dump = nft_ct_dump, +}; + +static struct nft_expr_type nft_ct_type __read_mostly = { + .name = "ct", + .ops = &nft_ct_ops, + .policy = nft_ct_policy, + .maxattr = NFTA_CT_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_ct_module_init(void) +{ + return nft_register_expr(&nft_ct_type); +} + +static void __exit nft_ct_module_exit(void) +{ + nft_unregister_expr(&nft_ct_type); +} + +module_init(nft_ct_module_init); +module_exit(nft_ct_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS_NFT_EXPR("ct"); diff --git a/net/netfilter/nft_expr_template.c b/net/netfilter/nft_expr_template.c new file mode 100644 index 000000000000..b6eed4d5a096 --- /dev/null +++ b/net/netfilter/nft_expr_template.c @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> + +struct nft_template { + +}; + +static void nft_template_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_template *priv = nft_expr_priv(expr); + +} + +static const struct nla_policy nft_template_policy[NFTA_TEMPLATE_MAX + 1] = { + [NFTA_TEMPLATE_ATTR] = { .type = NLA_U32 }, +}; + +static int nft_template_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_template *priv = nft_expr_priv(expr); + + return 0; +} + +static void nft_template_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_template *priv = nft_expr_priv(expr); + +} + +static int nft_template_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_template *priv = nft_expr_priv(expr); + + NLA_PUT_BE32(skb, NFTA_TEMPLATE_ATTR, priv->field); + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_template_type; +static const struct nft_expr_ops nft_template_ops = { + .type = &nft_template_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_template)), + .eval = nft_template_eval, + .init = nft_template_init, + .destroy = nft_template_destroy, + .dump = nft_template_dump, +}; + +static struct nft_expr_type nft_template_type __read_mostly = { + .name = "template", + .ops = &nft_template_ops, + .policy = nft_template_policy, + .maxattr = NFTA_TEMPLATE_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_template_module_init(void) +{ + return nft_register_expr(&nft_template_type); +} + +static void __exit nft_template_module_exit(void) +{ + nft_unregister_expr(&nft_template_type); +} + +module_init(nft_template_module_init); +module_exit(nft_template_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS_NFT_EXPR("template"); diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c new file mode 100644 index 000000000000..8e0bb75e7c51 --- /dev/null +++ b/net/netfilter/nft_exthdr.c @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +// FIXME: +#include <net/ipv6.h> + +struct nft_exthdr { + u8 type; + u8 offset; + u8 len; + enum nft_registers dreg:8; +}; + +static void nft_exthdr_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_exthdr *priv = nft_expr_priv(expr); + struct nft_data *dest = &data[priv->dreg]; + unsigned int offset; + int err; + + err = ipv6_find_hdr(pkt->skb, &offset, priv->type, NULL, NULL); + if (err < 0) + goto err; + offset += priv->offset; + + if (skb_copy_bits(pkt->skb, offset, dest->data, priv->len) < 0) + goto err; + return; +err: + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = { + [NFTA_EXTHDR_DREG] = { .type = NLA_U32 }, + [NFTA_EXTHDR_TYPE] = { .type = NLA_U8 }, + [NFTA_EXTHDR_OFFSET] = { .type = NLA_U32 }, + [NFTA_EXTHDR_LEN] = { .type = NLA_U32 }, +}; + +static int nft_exthdr_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_exthdr *priv = nft_expr_priv(expr); + int err; + + if (tb[NFTA_EXTHDR_DREG] == NULL || + tb[NFTA_EXTHDR_TYPE] == NULL || + tb[NFTA_EXTHDR_OFFSET] == NULL || + tb[NFTA_EXTHDR_LEN] == NULL) + return -EINVAL; + + priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]); + priv->offset = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OFFSET])); + priv->len = ntohl(nla_get_be32(tb[NFTA_EXTHDR_LEN])); + if (priv->len == 0 || + priv->len > FIELD_SIZEOF(struct nft_data, data)) + return -EINVAL; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_EXTHDR_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); +} + +static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_exthdr *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_EXTHDR_DREG, htonl(priv->dreg))) + goto nla_put_failure; + if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type)) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_EXTHDR_OFFSET, htonl(priv->offset))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_EXTHDR_LEN, htonl(priv->len))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_exthdr_type; +static const struct nft_expr_ops nft_exthdr_ops = { + .type = &nft_exthdr_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), + .eval = nft_exthdr_eval, + .init = nft_exthdr_init, + .dump = nft_exthdr_dump, +}; + +static struct nft_expr_type nft_exthdr_type __read_mostly = { + .name = "exthdr", + .ops = &nft_exthdr_ops, + .policy = nft_exthdr_policy, + .maxattr = NFTA_EXTHDR_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_exthdr_module_init(void) +{ + return nft_register_expr(&nft_exthdr_type); +} + +static void __exit nft_exthdr_module_exit(void) +{ + nft_unregister_expr(&nft_exthdr_type); +} + +module_init(nft_exthdr_module_init); +module_exit(nft_exthdr_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS_NFT_EXPR("exthdr"); diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c new file mode 100644 index 000000000000..3d3f8fce10a5 --- /dev/null +++ b/net/netfilter/nft_hash.c @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/list.h> +#include <linux/jhash.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> + +struct nft_hash { + struct hlist_head *hash; + unsigned int hsize; +}; + +struct nft_hash_elem { + struct hlist_node hnode; + struct nft_data key; + struct nft_data data[]; +}; + +static u32 nft_hash_rnd __read_mostly; +static bool nft_hash_rnd_initted __read_mostly; + +static unsigned int nft_hash_data(const struct nft_data *data, + unsigned int hsize, unsigned int len) +{ + unsigned int h; + + h = jhash(data->data, len, nft_hash_rnd); + return ((u64)h * hsize) >> 32; +} + +static bool nft_hash_lookup(const struct nft_set *set, + const struct nft_data *key, + struct nft_data *data) +{ + const struct nft_hash *priv = nft_set_priv(set); + const struct nft_hash_elem *he; + unsigned int h; + + h = nft_hash_data(key, priv->hsize, set->klen); + hlist_for_each_entry(he, &priv->hash[h], hnode) { + if (nft_data_cmp(&he->key, key, set->klen)) + continue; + if (set->flags & NFT_SET_MAP) + nft_data_copy(data, he->data); + return true; + } + return false; +} + +static void nft_hash_elem_destroy(const struct nft_set *set, + struct nft_hash_elem *he) +{ + nft_data_uninit(&he->key, NFT_DATA_VALUE); + if (set->flags & NFT_SET_MAP) + nft_data_uninit(he->data, set->dtype); + kfree(he); +} + +static int nft_hash_insert(const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he; + unsigned int size, h; + + if (elem->flags != 0) + return -EINVAL; + + size = sizeof(*he); + if (set->flags & NFT_SET_MAP) + size += sizeof(he->data[0]); + + he = kzalloc(size, GFP_KERNEL); + if (he == NULL) + return -ENOMEM; + + nft_data_copy(&he->key, &elem->key); + if (set->flags & NFT_SET_MAP) + nft_data_copy(he->data, &elem->data); + + h = nft_hash_data(&he->key, priv->hsize, set->klen); + hlist_add_head_rcu(&he->hnode, &priv->hash[h]); + return 0; +} + +static void nft_hash_remove(const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nft_hash_elem *he = elem->cookie; + + hlist_del_rcu(&he->hnode); + kfree(he); +} + +static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem) +{ + const struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he; + unsigned int h; + + h = nft_hash_data(&elem->key, priv->hsize, set->klen); + hlist_for_each_entry(he, &priv->hash[h], hnode) { + if (nft_data_cmp(&he->key, &elem->key, set->klen)) + continue; + + elem->cookie = he; + elem->flags = 0; + if (set->flags & NFT_SET_MAP) + nft_data_copy(&elem->data, he->data); + return 0; + } + return -ENOENT; +} + +static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, + struct nft_set_iter *iter) +{ + const struct nft_hash *priv = nft_set_priv(set); + const struct nft_hash_elem *he; + struct nft_set_elem elem; + unsigned int i; + + for (i = 0; i < priv->hsize; i++) { + hlist_for_each_entry(he, &priv->hash[i], hnode) { + if (iter->count < iter->skip) + goto cont; + + memcpy(&elem.key, &he->key, sizeof(elem.key)); + if (set->flags & NFT_SET_MAP) + memcpy(&elem.data, he->data, sizeof(elem.data)); + elem.flags = 0; + + iter->err = iter->fn(ctx, set, iter, &elem); + if (iter->err < 0) + return; +cont: + iter->count++; + } + } +} + +static unsigned int nft_hash_privsize(const struct nlattr * const nla[]) +{ + return sizeof(struct nft_hash); +} + +static int nft_hash_init(const struct nft_set *set, + const struct nlattr * const tb[]) +{ + struct nft_hash *priv = nft_set_priv(set); + unsigned int cnt, i; + + if (unlikely(!nft_hash_rnd_initted)) { + get_random_bytes(&nft_hash_rnd, 4); + nft_hash_rnd_initted = true; + } + + /* Aim for a load factor of 0.75 */ + // FIXME: temporarily broken until we have set descriptions + cnt = 100; + cnt = cnt * 4 / 3; + + priv->hash = kcalloc(cnt, sizeof(struct hlist_head), GFP_KERNEL); + if (priv->hash == NULL) + return -ENOMEM; + priv->hsize = cnt; + + for (i = 0; i < cnt; i++) + INIT_HLIST_HEAD(&priv->hash[i]); + + return 0; +} + +static void nft_hash_destroy(const struct nft_set *set) +{ + const struct nft_hash *priv = nft_set_priv(set); + const struct hlist_node *next; + struct nft_hash_elem *elem; + unsigned int i; + + for (i = 0; i < priv->hsize; i++) { + hlist_for_each_entry_safe(elem, next, &priv->hash[i], hnode) { + hlist_del(&elem->hnode); + nft_hash_elem_destroy(set, elem); + } + } + kfree(priv->hash); +} + +static struct nft_set_ops nft_hash_ops __read_mostly = { + .privsize = nft_hash_privsize, + .init = nft_hash_init, + .destroy = nft_hash_destroy, + .get = nft_hash_get, + .insert = nft_hash_insert, + .remove = nft_hash_remove, + .lookup = nft_hash_lookup, + .walk = nft_hash_walk, + .features = NFT_SET_MAP, + .owner = THIS_MODULE, +}; + +static int __init nft_hash_module_init(void) +{ + return nft_register_set(&nft_hash_ops); +} + +static void __exit nft_hash_module_exit(void) +{ + nft_unregister_set(&nft_hash_ops); +} + +module_init(nft_hash_module_init); +module_exit(nft_hash_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS_NFT_SET(); diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c new file mode 100644 index 000000000000..f169501f1ad4 --- /dev/null +++ b/net/netfilter/nft_immediate.c @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> +#include <net/netfilter/nf_tables.h> + +struct nft_immediate_expr { + struct nft_data data; + enum nft_registers dreg:8; + u8 dlen; +}; + +static void nft_immediate_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_immediate_expr *priv = nft_expr_priv(expr); + + nft_data_copy(&data[priv->dreg], &priv->data); +} + +static const struct nla_policy nft_immediate_policy[NFTA_IMMEDIATE_MAX + 1] = { + [NFTA_IMMEDIATE_DREG] = { .type = NLA_U32 }, + [NFTA_IMMEDIATE_DATA] = { .type = NLA_NESTED }, +}; + +static int nft_immediate_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_immediate_expr *priv = nft_expr_priv(expr); + struct nft_data_desc desc; + int err; + + if (tb[NFTA_IMMEDIATE_DREG] == NULL || + tb[NFTA_IMMEDIATE_DATA] == NULL) + return -EINVAL; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_IMMEDIATE_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + + err = nft_data_init(ctx, &priv->data, &desc, tb[NFTA_IMMEDIATE_DATA]); + if (err < 0) + return err; + priv->dlen = desc.len; + + err = nft_validate_data_load(ctx, priv->dreg, &priv->data, desc.type); + if (err < 0) + goto err1; + + return 0; + +err1: + nft_data_uninit(&priv->data, desc.type); + return err; +} + +static void nft_immediate_destroy(const struct nft_expr *expr) +{ + const struct nft_immediate_expr *priv = nft_expr_priv(expr); + return nft_data_uninit(&priv->data, nft_dreg_to_type(priv->dreg)); +} + +static int nft_immediate_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_immediate_expr *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_IMMEDIATE_DREG, htonl(priv->dreg))) + goto nla_put_failure; + + return nft_data_dump(skb, NFTA_IMMEDIATE_DATA, &priv->data, + nft_dreg_to_type(priv->dreg), priv->dlen); + +nla_put_failure: + return -1; +} + +static int nft_immediate_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + const struct nft_immediate_expr *priv = nft_expr_priv(expr); + + if (priv->dreg == NFT_REG_VERDICT) + *data = &priv->data; + + return 0; +} + +static struct nft_expr_type nft_imm_type; +static const struct nft_expr_ops nft_imm_ops = { + .type = &nft_imm_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)), + .eval = nft_immediate_eval, + .init = nft_immediate_init, + .destroy = nft_immediate_destroy, + .dump = nft_immediate_dump, + .validate = nft_immediate_validate, +}; + +static struct nft_expr_type nft_imm_type __read_mostly = { + .name = "immediate", + .ops = &nft_imm_ops, + .policy = nft_immediate_policy, + .maxattr = NFTA_IMMEDIATE_MAX, + .owner = THIS_MODULE, +}; + +int __init nft_immediate_module_init(void) +{ + return nft_register_expr(&nft_imm_type); +} + +void nft_immediate_module_exit(void) +{ + nft_unregister_expr(&nft_imm_type); +} diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c new file mode 100644 index 000000000000..85da5bd02f64 --- /dev/null +++ b/net/netfilter/nft_limit.c @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/spinlock.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> + +static DEFINE_SPINLOCK(limit_lock); + +struct nft_limit { + u64 tokens; + u64 rate; + u64 unit; + unsigned long stamp; +}; + +static void nft_limit_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_limit *priv = nft_expr_priv(expr); + + spin_lock_bh(&limit_lock); + if (time_after_eq(jiffies, priv->stamp)) { + priv->tokens = priv->rate; + priv->stamp = jiffies + priv->unit * HZ; + } + + if (priv->tokens >= 1) { + priv->tokens--; + spin_unlock_bh(&limit_lock); + return; + } + spin_unlock_bh(&limit_lock); + + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static const struct nla_policy nft_limit_policy[NFTA_LIMIT_MAX + 1] = { + [NFTA_LIMIT_RATE] = { .type = NLA_U64 }, + [NFTA_LIMIT_UNIT] = { .type = NLA_U64 }, +}; + +static int nft_limit_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_limit *priv = nft_expr_priv(expr); + + if (tb[NFTA_LIMIT_RATE] == NULL || + tb[NFTA_LIMIT_UNIT] == NULL) + return -EINVAL; + + priv->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE])); + priv->unit = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT])); + priv->stamp = jiffies + priv->unit * HZ; + priv->tokens = priv->rate; + return 0; +} + +static int nft_limit_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_limit *priv = nft_expr_priv(expr); + + if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(priv->rate))) + goto nla_put_failure; + if (nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(priv->unit))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_limit_type; +static const struct nft_expr_ops nft_limit_ops = { + .type = &nft_limit_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_limit)), + .eval = nft_limit_eval, + .init = nft_limit_init, + .dump = nft_limit_dump, +}; + +static struct nft_expr_type nft_limit_type __read_mostly = { + .name = "limit", + .ops = &nft_limit_ops, + .policy = nft_limit_policy, + .maxattr = NFTA_LIMIT_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_limit_module_init(void) +{ + return nft_register_expr(&nft_limit_type); +} + +static void __exit nft_limit_module_exit(void) +{ + nft_unregister_expr(&nft_limit_type); +} + +module_init(nft_limit_module_init); +module_exit(nft_limit_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS_NFT_EXPR("limit"); diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c new file mode 100644 index 000000000000..57cad072a13e --- /dev/null +++ b/net/netfilter/nft_log.c @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_log.h> +#include <linux/netdevice.h> + +static const char *nft_log_null_prefix = ""; + +struct nft_log { + struct nf_loginfo loginfo; + char *prefix; + int family; +}; + +static void nft_log_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_log *priv = nft_expr_priv(expr); + struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); + + nf_log_packet(net, priv->family, pkt->hooknum, pkt->skb, pkt->in, + pkt->out, &priv->loginfo, "%s", priv->prefix); +} + +static const struct nla_policy nft_log_policy[NFTA_LOG_MAX + 1] = { + [NFTA_LOG_GROUP] = { .type = NLA_U16 }, + [NFTA_LOG_PREFIX] = { .type = NLA_STRING }, + [NFTA_LOG_SNAPLEN] = { .type = NLA_U32 }, + [NFTA_LOG_QTHRESHOLD] = { .type = NLA_U16 }, +}; + +static int nft_log_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_log *priv = nft_expr_priv(expr); + struct nf_loginfo *li = &priv->loginfo; + const struct nlattr *nla; + + priv->family = ctx->afi->family; + + nla = tb[NFTA_LOG_PREFIX]; + if (nla != NULL) { + priv->prefix = kmalloc(nla_len(nla) + 1, GFP_KERNEL); + if (priv->prefix == NULL) + return -ENOMEM; + nla_strlcpy(priv->prefix, nla, nla_len(nla) + 1); + } else + priv->prefix = (char *)nft_log_null_prefix; + + li->type = NF_LOG_TYPE_ULOG; + if (tb[NFTA_LOG_GROUP] != NULL) + li->u.ulog.group = ntohs(nla_get_be16(tb[NFTA_LOG_GROUP])); + + if (tb[NFTA_LOG_SNAPLEN] != NULL) + li->u.ulog.copy_len = ntohl(nla_get_be32(tb[NFTA_LOG_SNAPLEN])); + if (tb[NFTA_LOG_QTHRESHOLD] != NULL) { + li->u.ulog.qthreshold = + ntohs(nla_get_be16(tb[NFTA_LOG_QTHRESHOLD])); + } + + return 0; +} + +static void nft_log_destroy(const struct nft_expr *expr) +{ + struct nft_log *priv = nft_expr_priv(expr); + + if (priv->prefix != nft_log_null_prefix) + kfree(priv->prefix); +} + +static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_log *priv = nft_expr_priv(expr); + const struct nf_loginfo *li = &priv->loginfo; + + if (priv->prefix != nft_log_null_prefix) + if (nla_put_string(skb, NFTA_LOG_PREFIX, priv->prefix)) + goto nla_put_failure; + if (li->u.ulog.group) + if (nla_put_be16(skb, NFTA_LOG_GROUP, htons(li->u.ulog.group))) + goto nla_put_failure; + if (li->u.ulog.copy_len) + if (nla_put_be32(skb, NFTA_LOG_SNAPLEN, + htonl(li->u.ulog.copy_len))) + goto nla_put_failure; + if (li->u.ulog.qthreshold) + if (nla_put_be16(skb, NFTA_LOG_QTHRESHOLD, + htons(li->u.ulog.qthreshold))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_log_type; +static const struct nft_expr_ops nft_log_ops = { + .type = &nft_log_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_log)), + .eval = nft_log_eval, + .init = nft_log_init, + .destroy = nft_log_destroy, + .dump = nft_log_dump, +}; + +static struct nft_expr_type nft_log_type __read_mostly = { + .name = "log", + .ops = &nft_log_ops, + .policy = nft_log_policy, + .maxattr = NFTA_LOG_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_log_module_init(void) +{ + return nft_register_expr(&nft_log_type); +} + +static void __exit nft_log_module_exit(void) +{ + nft_unregister_expr(&nft_log_type); +} + +module_init(nft_log_module_init); +module_exit(nft_log_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS_NFT_EXPR("log"); diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c new file mode 100644 index 000000000000..8a6116b75b5a --- /dev/null +++ b/net/netfilter/nft_lookup.c @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2009 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/rbtree.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> + +struct nft_lookup { + struct nft_set *set; + enum nft_registers sreg:8; + enum nft_registers dreg:8; + struct nft_set_binding binding; +}; + +static void nft_lookup_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_lookup *priv = nft_expr_priv(expr); + const struct nft_set *set = priv->set; + + if (set->ops->lookup(set, &data[priv->sreg], &data[priv->dreg])) + return; + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = { + [NFTA_LOOKUP_SET] = { .type = NLA_STRING }, + [NFTA_LOOKUP_SREG] = { .type = NLA_U32 }, + [NFTA_LOOKUP_DREG] = { .type = NLA_U32 }, +}; + +static int nft_lookup_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_lookup *priv = nft_expr_priv(expr); + struct nft_set *set; + int err; + + if (tb[NFTA_LOOKUP_SET] == NULL || + tb[NFTA_LOOKUP_SREG] == NULL) + return -EINVAL; + + set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET]); + if (IS_ERR(set)) + return PTR_ERR(set); + + priv->sreg = ntohl(nla_get_be32(tb[NFTA_LOOKUP_SREG])); + err = nft_validate_input_register(priv->sreg); + if (err < 0) + return err; + + if (tb[NFTA_LOOKUP_DREG] != NULL) { + if (!(set->flags & NFT_SET_MAP)) + return -EINVAL; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_LOOKUP_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + + if (priv->dreg == NFT_REG_VERDICT) { + if (set->dtype != NFT_DATA_VERDICT) + return -EINVAL; + } else if (set->dtype == NFT_DATA_VERDICT) + return -EINVAL; + } else if (set->flags & NFT_SET_MAP) + return -EINVAL; + + err = nf_tables_bind_set(ctx, set, &priv->binding); + if (err < 0) + return err; + + priv->set = set; + return 0; +} + +static void nft_lookup_destroy(const struct nft_expr *expr) +{ + struct nft_lookup *priv = nft_expr_priv(expr); + + nf_tables_unbind_set(NULL, priv->set, &priv->binding); +} + +static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_lookup *priv = nft_expr_priv(expr); + + if (nla_put_string(skb, NFTA_LOOKUP_SET, priv->set->name)) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_LOOKUP_SREG, htonl(priv->sreg))) + goto nla_put_failure; + if (priv->set->flags & NFT_SET_MAP) + if (nla_put_be32(skb, NFTA_LOOKUP_DREG, htonl(priv->dreg))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_lookup_type; +static const struct nft_expr_ops nft_lookup_ops = { + .type = &nft_lookup_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)), + .eval = nft_lookup_eval, + .init = nft_lookup_init, + .destroy = nft_lookup_destroy, + .dump = nft_lookup_dump, +}; + +static struct nft_expr_type nft_lookup_type __read_mostly = { + .name = "lookup", + .ops = &nft_lookup_ops, + .policy = nft_lookup_policy, + .maxattr = NFTA_LOOKUP_MAX, + .owner = THIS_MODULE, +}; + +int __init nft_lookup_module_init(void) +{ + return nft_register_expr(&nft_lookup_type); +} + +void nft_lookup_module_exit(void) +{ + nft_unregister_expr(&nft_lookup_type); +} diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c new file mode 100644 index 000000000000..8c28220a90b3 --- /dev/null +++ b/net/netfilter/nft_meta.c @@ -0,0 +1,228 @@ +/* + * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/dst.h> +#include <net/sock.h> +#include <net/tcp_states.h> /* for TCP_TIME_WAIT */ +#include <net/netfilter/nf_tables.h> + +struct nft_meta { + enum nft_meta_keys key:8; + enum nft_registers dreg:8; +}; + +static void nft_meta_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_meta *priv = nft_expr_priv(expr); + const struct sk_buff *skb = pkt->skb; + const struct net_device *in = pkt->in, *out = pkt->out; + struct nft_data *dest = &data[priv->dreg]; + + switch (priv->key) { + case NFT_META_LEN: + dest->data[0] = skb->len; + break; + case NFT_META_PROTOCOL: + *(__be16 *)dest->data = skb->protocol; + break; + case NFT_META_PRIORITY: + dest->data[0] = skb->priority; + break; + case NFT_META_MARK: + dest->data[0] = skb->mark; + break; + case NFT_META_IIF: + if (in == NULL) + goto err; + dest->data[0] = in->ifindex; + break; + case NFT_META_OIF: + if (out == NULL) + goto err; + dest->data[0] = out->ifindex; + break; + case NFT_META_IIFNAME: + if (in == NULL) + goto err; + strncpy((char *)dest->data, in->name, sizeof(dest->data)); + break; + case NFT_META_OIFNAME: + if (out == NULL) + goto err; + strncpy((char *)dest->data, out->name, sizeof(dest->data)); + break; + case NFT_META_IIFTYPE: + if (in == NULL) + goto err; + *(u16 *)dest->data = in->type; + break; + case NFT_META_OIFTYPE: + if (out == NULL) + goto err; + *(u16 *)dest->data = out->type; + break; + case NFT_META_SKUID: + if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT) + goto err; + + read_lock_bh(&skb->sk->sk_callback_lock); + if (skb->sk->sk_socket == NULL || + skb->sk->sk_socket->file == NULL) { + read_unlock_bh(&skb->sk->sk_callback_lock); + goto err; + } + + dest->data[0] = + from_kuid_munged(&init_user_ns, + skb->sk->sk_socket->file->f_cred->fsuid); + read_unlock_bh(&skb->sk->sk_callback_lock); + break; + case NFT_META_SKGID: + if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT) + goto err; + + read_lock_bh(&skb->sk->sk_callback_lock); + if (skb->sk->sk_socket == NULL || + skb->sk->sk_socket->file == NULL) { + read_unlock_bh(&skb->sk->sk_callback_lock); + goto err; + } + dest->data[0] = + from_kgid_munged(&init_user_ns, + skb->sk->sk_socket->file->f_cred->fsgid); + read_unlock_bh(&skb->sk->sk_callback_lock); + break; +#ifdef CONFIG_NET_CLS_ROUTE + case NFT_META_RTCLASSID: { + const struct dst_entry *dst = skb_dst(skb); + + if (dst == NULL) + goto err; + dest->data[0] = dst->tclassid; + break; + } +#endif +#ifdef CONFIG_NETWORK_SECMARK + case NFT_META_SECMARK: + dest->data[0] = skb->secmark; + break; +#endif + default: + WARN_ON(1); + goto err; + } + return; + +err: + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = { + [NFTA_META_DREG] = { .type = NLA_U32 }, + [NFTA_META_KEY] = { .type = NLA_U32 }, +}; + +static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_meta *priv = nft_expr_priv(expr); + int err; + + if (tb[NFTA_META_DREG] == NULL || + tb[NFTA_META_KEY] == NULL) + return -EINVAL; + + priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); + switch (priv->key) { + case NFT_META_LEN: + case NFT_META_PROTOCOL: + case NFT_META_PRIORITY: + case NFT_META_MARK: + case NFT_META_IIF: + case NFT_META_OIF: + case NFT_META_IIFNAME: + case NFT_META_OIFNAME: + case NFT_META_IIFTYPE: + case NFT_META_OIFTYPE: + case NFT_META_SKUID: + case NFT_META_SKGID: +#ifdef CONFIG_NET_CLS_ROUTE + case NFT_META_RTCLASSID: +#endif +#ifdef CONFIG_NETWORK_SECMARK + case NFT_META_SECMARK: +#endif + break; + default: + return -EOPNOTSUPP; + } + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); +} + +static int nft_meta_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_meta *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_META_DREG, htonl(priv->dreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_meta_type; +static const struct nft_expr_ops nft_meta_ops = { + .type = &nft_meta_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), + .eval = nft_meta_eval, + .init = nft_meta_init, + .dump = nft_meta_dump, +}; + +static struct nft_expr_type nft_meta_type __read_mostly = { + .name = "meta", + .ops = &nft_meta_ops, + .policy = nft_meta_policy, + .maxattr = NFTA_META_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_meta_module_init(void) +{ + return nft_register_expr(&nft_meta_type); +} + +static void __exit nft_meta_module_exit(void) +{ + nft_unregister_expr(&nft_meta_type); +} + +module_init(nft_meta_module_init); +module_exit(nft_meta_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS_NFT_EXPR("meta"); diff --git a/net/netfilter/nft_meta_target.c b/net/netfilter/nft_meta_target.c new file mode 100644 index 000000000000..71177df75ffb --- /dev/null +++ b/net/netfilter/nft_meta_target.c @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/rbtree.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> + +struct nft_meta { + enum nft_meta_keys key; +}; + +static void nft_meta_eval(const struct nft_expr *expr, + struct nft_data *nfres, + struct nft_data *data, + const struct nft_pktinfo *pkt) +{ + const struct nft_meta *meta = nft_expr_priv(expr); + struct sk_buff *skb = pkt->skb; + u32 val = data->data[0]; + + switch (meta->key) { + case NFT_META_MARK: + skb->mark = val; + break; + case NFT_META_PRIORITY: + skb->priority = val; + break; + case NFT_META_NFTRACE: + skb->nf_trace = val; + break; +#ifdef CONFIG_NETWORK_SECMARK + case NFT_META_SECMARK: + skb->secmark = val; + break; +#endif + default: + WARN_ON(1); + } +} + +static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = { + [NFTA_META_KEY] = { .type = NLA_U32 }, +}; + +static int nft_meta_init(const struct nft_expr *expr, struct nlattr *tb[]) +{ + struct nft_meta *meta = nft_expr_priv(expr); + + if (tb[NFTA_META_KEY] == NULL) + return -EINVAL; + + meta->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); + switch (meta->key) { + case NFT_META_MARK: + case NFT_META_PRIORITY: + case NFT_META_NFTRACE: +#ifdef CONFIG_NETWORK_SECMARK + case NFT_META_SECMARK: +#endif + break; + default: + return -EINVAL; + } + + return 0; +} + +static int nft_meta_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + struct nft_meta *meta = nft_expr_priv(expr); + + NLA_PUT_BE32(skb, NFTA_META_KEY, htonl(meta->key)); + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops meta_target __read_mostly = { + .name = "meta", + .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), + .owner = THIS_MODULE, + .eval = nft_meta_eval, + .init = nft_meta_init, + .dump = nft_meta_dump, + .policy = nft_meta_policy, + .maxattr = NFTA_META_MAX, +}; + +static int __init nft_meta_target_init(void) +{ + return nft_register_expr(&meta_target); +} + +static void __exit nft_meta_target_exit(void) +{ + nft_unregister_expr(&meta_target); +} + +module_init(nft_meta_target_init); +module_exit(nft_meta_target_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS_NFT_EXPR("meta"); diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c new file mode 100644 index 000000000000..b0b87b2d2411 --- /dev/null +++ b/net/netfilter/nft_nat.c @@ -0,0 +1,220 @@ +/* + * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org> + * Copyright (c) 2012 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <linux/string.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter_ipv4.h> +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_nat_core.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_nat_l3proto.h> +#include <net/ip.h> + +struct nft_nat { + enum nft_registers sreg_addr_min:8; + enum nft_registers sreg_addr_max:8; + enum nft_registers sreg_proto_min:8; + enum nft_registers sreg_proto_max:8; + int family; + enum nf_nat_manip_type type; +}; + +static void nft_nat_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_nat *priv = nft_expr_priv(expr); + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(pkt->skb, &ctinfo); + struct nf_nat_range range; + + memset(&range, 0, sizeof(range)); + if (priv->sreg_addr_min) { + if (priv->family == AF_INET) { + range.min_addr.ip = data[priv->sreg_addr_min].data[0]; + range.max_addr.ip = data[priv->sreg_addr_max].data[0]; + + } else { + memcpy(range.min_addr.ip6, + data[priv->sreg_addr_min].data, + sizeof(struct nft_data)); + memcpy(range.max_addr.ip6, + data[priv->sreg_addr_max].data, + sizeof(struct nft_data)); + } + range.flags |= NF_NAT_RANGE_MAP_IPS; + } + + if (priv->sreg_proto_min) { + range.min_proto.all = data[priv->sreg_proto_min].data[0]; + range.max_proto.all = data[priv->sreg_proto_max].data[0]; + range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + } + + data[NFT_REG_VERDICT].verdict = + nf_nat_setup_info(ct, &range, priv->type); +} + +static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = { + [NFTA_NAT_TYPE] = { .type = NLA_U32 }, + [NFTA_NAT_FAMILY] = { .type = NLA_U32 }, + [NFTA_NAT_REG_ADDR_MIN] = { .type = NLA_U32 }, + [NFTA_NAT_REG_ADDR_MAX] = { .type = NLA_U32 }, + [NFTA_NAT_REG_PROTO_MIN] = { .type = NLA_U32 }, + [NFTA_NAT_REG_PROTO_MAX] = { .type = NLA_U32 }, +}; + +static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_nat *priv = nft_expr_priv(expr); + int err; + + if (tb[NFTA_NAT_TYPE] == NULL) + return -EINVAL; + + switch (ntohl(nla_get_be32(tb[NFTA_NAT_TYPE]))) { + case NFT_NAT_SNAT: + priv->type = NF_NAT_MANIP_SRC; + break; + case NFT_NAT_DNAT: + priv->type = NF_NAT_MANIP_DST; + break; + default: + return -EINVAL; + } + + if (tb[NFTA_NAT_FAMILY] == NULL) + return -EINVAL; + + priv->family = ntohl(nla_get_be32(tb[NFTA_NAT_FAMILY])); + if (priv->family != AF_INET && priv->family != AF_INET6) + return -EINVAL; + + if (tb[NFTA_NAT_REG_ADDR_MIN]) { + priv->sreg_addr_min = ntohl(nla_get_be32( + tb[NFTA_NAT_REG_ADDR_MIN])); + err = nft_validate_input_register(priv->sreg_addr_min); + if (err < 0) + return err; + } + + if (tb[NFTA_NAT_REG_ADDR_MAX]) { + priv->sreg_addr_max = ntohl(nla_get_be32( + tb[NFTA_NAT_REG_ADDR_MAX])); + err = nft_validate_input_register(priv->sreg_addr_max); + if (err < 0) + return err; + } else + priv->sreg_addr_max = priv->sreg_addr_min; + + if (tb[NFTA_NAT_REG_PROTO_MIN]) { + priv->sreg_proto_min = ntohl(nla_get_be32( + tb[NFTA_NAT_REG_PROTO_MIN])); + err = nft_validate_input_register(priv->sreg_proto_min); + if (err < 0) + return err; + } + + if (tb[NFTA_NAT_REG_PROTO_MAX]) { + priv->sreg_proto_max = ntohl(nla_get_be32( + tb[NFTA_NAT_REG_PROTO_MAX])); + err = nft_validate_input_register(priv->sreg_proto_max); + if (err < 0) + return err; + } else + priv->sreg_proto_max = priv->sreg_proto_min; + + return 0; +} + +static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_nat *priv = nft_expr_priv(expr); + + switch (priv->type) { + case NF_NAT_MANIP_SRC: + if (nla_put_be32(skb, NFTA_NAT_TYPE, htonl(NFT_NAT_SNAT))) + goto nla_put_failure; + break; + case NF_NAT_MANIP_DST: + if (nla_put_be32(skb, NFTA_NAT_TYPE, htonl(NFT_NAT_DNAT))) + goto nla_put_failure; + break; + } + + if (nla_put_be32(skb, NFTA_NAT_FAMILY, htonl(priv->family))) + goto nla_put_failure; + if (nla_put_be32(skb, + NFTA_NAT_REG_ADDR_MIN, htonl(priv->sreg_addr_min))) + goto nla_put_failure; + if (nla_put_be32(skb, + NFTA_NAT_REG_ADDR_MAX, htonl(priv->sreg_addr_max))) + goto nla_put_failure; + if (nla_put_be32(skb, + NFTA_NAT_REG_PROTO_MIN, htonl(priv->sreg_proto_min))) + goto nla_put_failure; + if (nla_put_be32(skb, + NFTA_NAT_REG_PROTO_MAX, htonl(priv->sreg_proto_max))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_nat_type; +static const struct nft_expr_ops nft_nat_ops = { + .type = &nft_nat_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_nat)), + .eval = nft_nat_eval, + .init = nft_nat_init, + .dump = nft_nat_dump, +}; + +static struct nft_expr_type nft_nat_type __read_mostly = { + .name = "nat", + .ops = &nft_nat_ops, + .policy = nft_nat_policy, + .maxattr = NFTA_NAT_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_nat_module_init(void) +{ + int err; + + err = nft_register_expr(&nft_nat_type); + if (err < 0) + return err; + + return 0; +} + +static void __exit nft_nat_module_exit(void) +{ + nft_unregister_expr(&nft_nat_type); +} + +module_init(nft_nat_module_init); +module_exit(nft_nat_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>"); +MODULE_ALIAS_NFT_EXPR("nat"); diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c new file mode 100644 index 000000000000..a2aeb318678f --- /dev/null +++ b/net/netfilter/nft_payload.c @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> +#include <net/netfilter/nf_tables.h> + +static void nft_payload_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_payload *priv = nft_expr_priv(expr); + const struct sk_buff *skb = pkt->skb; + struct nft_data *dest = &data[priv->dreg]; + int offset; + + switch (priv->base) { + case NFT_PAYLOAD_LL_HEADER: + if (!skb_mac_header_was_set(skb)) + goto err; + offset = skb_mac_header(skb) - skb->data; + break; + case NFT_PAYLOAD_NETWORK_HEADER: + offset = skb_network_offset(skb); + break; + case NFT_PAYLOAD_TRANSPORT_HEADER: + offset = pkt->xt.thoff; + break; + default: + BUG(); + } + offset += priv->offset; + + if (skb_copy_bits(skb, offset, dest->data, priv->len) < 0) + goto err; + return; +err: + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = { + [NFTA_PAYLOAD_DREG] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_BASE] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_OFFSET] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_LEN] = { .type = NLA_U32 }, +}; + +static int nft_payload_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_payload *priv = nft_expr_priv(expr); + int err; + + priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); + priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); + priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); +} + +static int nft_payload_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_payload *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_PAYLOAD_DREG, htonl(priv->dreg)) || + nla_put_be32(skb, NFTA_PAYLOAD_BASE, htonl(priv->base)) || + nla_put_be32(skb, NFTA_PAYLOAD_OFFSET, htonl(priv->offset)) || + nla_put_be32(skb, NFTA_PAYLOAD_LEN, htonl(priv->len))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_payload_type; +static const struct nft_expr_ops nft_payload_ops = { + .type = &nft_payload_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_payload)), + .eval = nft_payload_eval, + .init = nft_payload_init, + .dump = nft_payload_dump, +}; + +const struct nft_expr_ops nft_payload_fast_ops = { + .type = &nft_payload_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_payload)), + .eval = nft_payload_eval, + .init = nft_payload_init, + .dump = nft_payload_dump, +}; + +static const struct nft_expr_ops * +nft_payload_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + enum nft_payload_bases base; + unsigned int offset, len; + + if (tb[NFTA_PAYLOAD_DREG] == NULL || + tb[NFTA_PAYLOAD_BASE] == NULL || + tb[NFTA_PAYLOAD_OFFSET] == NULL || + tb[NFTA_PAYLOAD_LEN] == NULL) + return ERR_PTR(-EINVAL); + + base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); + switch (base) { + case NFT_PAYLOAD_LL_HEADER: + case NFT_PAYLOAD_NETWORK_HEADER: + case NFT_PAYLOAD_TRANSPORT_HEADER: + break; + default: + return ERR_PTR(-EOPNOTSUPP); + } + + offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); + len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); + if (len == 0 || len > FIELD_SIZEOF(struct nft_data, data)) + return ERR_PTR(-EINVAL); + + if (len <= 4 && IS_ALIGNED(offset, len) && base != NFT_PAYLOAD_LL_HEADER) + return &nft_payload_fast_ops; + else + return &nft_payload_ops; +} + +static struct nft_expr_type nft_payload_type __read_mostly = { + .name = "payload", + .select_ops = nft_payload_select_ops, + .policy = nft_payload_policy, + .maxattr = NFTA_PAYLOAD_MAX, + .owner = THIS_MODULE, +}; + +int __init nft_payload_module_init(void) +{ + return nft_register_expr(&nft_payload_type); +} + +void nft_payload_module_exit(void) +{ + nft_unregister_expr(&nft_payload_type); +} diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c new file mode 100644 index 000000000000..ca0c1b231bfe --- /dev/null +++ b/net/netfilter/nft_rbtree.c @@ -0,0 +1,247 @@ +/* + * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/list.h> +#include <linux/rbtree.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> + +struct nft_rbtree { + struct rb_root root; +}; + +struct nft_rbtree_elem { + struct rb_node node; + u16 flags; + struct nft_data key; + struct nft_data data[]; +}; + +static bool nft_rbtree_lookup(const struct nft_set *set, + const struct nft_data *key, + struct nft_data *data) +{ + const struct nft_rbtree *priv = nft_set_priv(set); + const struct nft_rbtree_elem *rbe, *interval = NULL; + const struct rb_node *parent = priv->root.rb_node; + int d; + + while (parent != NULL) { + rbe = rb_entry(parent, struct nft_rbtree_elem, node); + + d = nft_data_cmp(&rbe->key, key, set->klen); + if (d < 0) { + parent = parent->rb_left; + interval = rbe; + } else if (d > 0) + parent = parent->rb_right; + else { +found: + if (rbe->flags & NFT_SET_ELEM_INTERVAL_END) + goto out; + if (set->flags & NFT_SET_MAP) + nft_data_copy(data, rbe->data); + return true; + } + } + + if (set->flags & NFT_SET_INTERVAL && interval != NULL) { + rbe = interval; + goto found; + } +out: + return false; +} + +static void nft_rbtree_elem_destroy(const struct nft_set *set, + struct nft_rbtree_elem *rbe) +{ + nft_data_uninit(&rbe->key, NFT_DATA_VALUE); + if (set->flags & NFT_SET_MAP) + nft_data_uninit(rbe->data, set->dtype); + kfree(rbe); +} + +static int __nft_rbtree_insert(const struct nft_set *set, + struct nft_rbtree_elem *new) +{ + struct nft_rbtree *priv = nft_set_priv(set); + struct nft_rbtree_elem *rbe; + struct rb_node *parent, **p; + int d; + + parent = NULL; + p = &priv->root.rb_node; + while (*p != NULL) { + parent = *p; + rbe = rb_entry(parent, struct nft_rbtree_elem, node); + d = nft_data_cmp(&rbe->key, &new->key, set->klen); + if (d < 0) + p = &parent->rb_left; + else if (d > 0) + p = &parent->rb_right; + else + return -EEXIST; + } + rb_link_node(&new->node, parent, p); + rb_insert_color(&new->node, &priv->root); + return 0; +} + +static int nft_rbtree_insert(const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nft_rbtree_elem *rbe; + unsigned int size; + int err; + + size = sizeof(*rbe); + if (set->flags & NFT_SET_MAP) + size += sizeof(rbe->data[0]); + + rbe = kzalloc(size, GFP_KERNEL); + if (rbe == NULL) + return -ENOMEM; + + rbe->flags = elem->flags; + nft_data_copy(&rbe->key, &elem->key); + if (set->flags & NFT_SET_MAP) + nft_data_copy(rbe->data, &elem->data); + + err = __nft_rbtree_insert(set, rbe); + if (err < 0) + kfree(rbe); + return err; +} + +static void nft_rbtree_remove(const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nft_rbtree *priv = nft_set_priv(set); + struct nft_rbtree_elem *rbe = elem->cookie; + + rb_erase(&rbe->node, &priv->root); + kfree(rbe); +} + +static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem) +{ + const struct nft_rbtree *priv = nft_set_priv(set); + const struct rb_node *parent = priv->root.rb_node; + struct nft_rbtree_elem *rbe; + int d; + + while (parent != NULL) { + rbe = rb_entry(parent, struct nft_rbtree_elem, node); + + d = nft_data_cmp(&rbe->key, &elem->key, set->klen); + if (d < 0) + parent = parent->rb_left; + else if (d > 0) + parent = parent->rb_right; + else { + elem->cookie = rbe; + if (set->flags & NFT_SET_MAP) + nft_data_copy(&elem->data, rbe->data); + elem->flags = rbe->flags; + return 0; + } + } + return -ENOENT; +} + +static void nft_rbtree_walk(const struct nft_ctx *ctx, + const struct nft_set *set, + struct nft_set_iter *iter) +{ + const struct nft_rbtree *priv = nft_set_priv(set); + const struct nft_rbtree_elem *rbe; + struct nft_set_elem elem; + struct rb_node *node; + + for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { + if (iter->count < iter->skip) + goto cont; + + rbe = rb_entry(node, struct nft_rbtree_elem, node); + nft_data_copy(&elem.key, &rbe->key); + if (set->flags & NFT_SET_MAP) + nft_data_copy(&elem.data, rbe->data); + elem.flags = rbe->flags; + + iter->err = iter->fn(ctx, set, iter, &elem); + if (iter->err < 0) + return; +cont: + iter->count++; + } +} + +static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[]) +{ + return sizeof(struct nft_rbtree); +} + +static int nft_rbtree_init(const struct nft_set *set, + const struct nlattr * const nla[]) +{ + struct nft_rbtree *priv = nft_set_priv(set); + + priv->root = RB_ROOT; + return 0; +} + +static void nft_rbtree_destroy(const struct nft_set *set) +{ + struct nft_rbtree *priv = nft_set_priv(set); + struct nft_rbtree_elem *rbe; + struct rb_node *node; + + while ((node = priv->root.rb_node) != NULL) { + rb_erase(node, &priv->root); + rbe = rb_entry(node, struct nft_rbtree_elem, node); + nft_rbtree_elem_destroy(set, rbe); + } +} + +static struct nft_set_ops nft_rbtree_ops __read_mostly = { + .privsize = nft_rbtree_privsize, + .init = nft_rbtree_init, + .destroy = nft_rbtree_destroy, + .insert = nft_rbtree_insert, + .remove = nft_rbtree_remove, + .get = nft_rbtree_get, + .lookup = nft_rbtree_lookup, + .walk = nft_rbtree_walk, + .features = NFT_SET_INTERVAL | NFT_SET_MAP, + .owner = THIS_MODULE, +}; + +static int __init nft_rbtree_module_init(void) +{ + return nft_register_set(&nft_rbtree_ops); +} + +static void __exit nft_rbtree_module_exit(void) +{ + nft_unregister_set(&nft_rbtree_ops); +} + +module_init(nft_rbtree_module_init); +module_exit(nft_rbtree_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS_NFT_SET(); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 568c7699abf1..3f224d7795f5 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -4668,7 +4668,7 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex, return NF_ACCEPT; } -static unsigned int selinux_ipv4_forward(unsigned int hooknum, +static unsigned int selinux_ipv4_forward(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -4678,7 +4678,7 @@ static unsigned int selinux_ipv4_forward(unsigned int hooknum, } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -static unsigned int selinux_ipv6_forward(unsigned int hooknum, +static unsigned int selinux_ipv6_forward(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -4710,7 +4710,7 @@ static unsigned int selinux_ip_output(struct sk_buff *skb, return NF_ACCEPT; } -static unsigned int selinux_ipv4_output(unsigned int hooknum, +static unsigned int selinux_ipv4_output(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -4837,7 +4837,7 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex, return NF_ACCEPT; } -static unsigned int selinux_ipv4_postroute(unsigned int hooknum, +static unsigned int selinux_ipv4_postroute(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -4847,7 +4847,7 @@ static unsigned int selinux_ipv4_postroute(unsigned int hooknum, } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -static unsigned int selinux_ipv6_postroute(unsigned int hooknum, +static unsigned int selinux_ipv6_postroute(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, |