summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2020-06-02 11:50:23 -0700
committerAlexei Starovoitov <ast@kernel.org>2020-06-02 11:51:15 -0700
commitf371c923505010b28c8a76f7d7973c014a1d0f5c (patch)
tree47088e7997d1baf81e988db64874164ff5bbe234
parent9a25c1df24a6fea9dc79eec950453c4e00f707fd (diff)
parentc4ba153b6501fa7ccfdc7e57946fb1d6011e36e8 (diff)
Merge branch 'csum-fixes'
Daniel Borkmann says: ==================== This series fixes an issue originally reported by Lorenz Bauer where using the bpf_skb_adjust_room() helper hid a checksum bug since it wasn't adjusting CHECKSUM_UNNECESSARY's skb->csum_level after decap. The fix is two-fold: i) We do a safe reset in bpf_skb_adjust_room() to CHECKSUM_NONE with an opt- out flag BPF_F_ADJ_ROOM_NO_CSUM_RESET. ii) We add a new bpf_csum_level() for the latter in order to allow users to manually inc/dec the skb->csum_level when needed. The series is rebased against latest bpf-next tree. It can be applied there, or to bpf after the merge win sync from net-next. Thanks! ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-rw-r--r--include/linux/skbuff.h8
-rw-r--r--include/uapi/linux/bpf.h51
-rw-r--r--net/core/filter.c46
-rw-r--r--tools/include/uapi/linux/bpf.h51
-rw-r--r--tools/testing/selftests/bpf/progs/test_cls_redirect.c9
5 files changed, 158 insertions, 7 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a0d5c2760103..0c0377fc00c2 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3919,6 +3919,14 @@ static inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb)
}
}
+static inline void __skb_reset_checksum_unnecessary(struct sk_buff *skb)
+{
+ if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+ skb->ip_summed = CHECKSUM_NONE;
+ skb->csum_level = 0;
+ }
+}
+
/* Check if we need to perform checksum complete validation.
*
* Returns true if checksum complete is needed, false otherwise
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b9ed9f14f2a2..c65b374a5090 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1635,6 +1635,13 @@ union bpf_attr {
* Grow or shrink the room for data in the packet associated to
* *skb* by *len_diff*, and according to the selected *mode*.
*
+ * By default, the helper will reset any offloaded checksum
+ * indicator of the skb to CHECKSUM_NONE. This can be avoided
+ * by the following flag:
+ *
+ * * **BPF_F_ADJ_ROOM_NO_CSUM_RESET**: Do not reset offloaded
+ * checksum data of the skb to CHECKSUM_NONE.
+ *
* There are two supported modes at this time:
*
* * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer
@@ -3213,6 +3220,38 @@ union bpf_attr {
* calculation.
* Return
* Requested value, or 0, if flags are not recognized.
+ *
+ * int bpf_csum_level(struct sk_buff *skb, u64 level)
+ * Description
+ * Change the skbs checksum level by one layer up or down, or
+ * reset it entirely to none in order to have the stack perform
+ * checksum validation. The level is applicable to the following
+ * protocols: TCP, UDP, GRE, SCTP, FCOE. For example, a decap of
+ * | ETH | IP | UDP | GUE | IP | TCP | into | ETH | IP | TCP |
+ * through **bpf_skb_adjust_room**\ () helper with passing in
+ * **BPF_F_ADJ_ROOM_NO_CSUM_RESET** flag would require one call
+ * to **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_DEC** since
+ * the UDP header is removed. Similarly, an encap of the latter
+ * into the former could be accompanied by a helper call to
+ * **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_INC** if the
+ * skb is still intended to be processed in higher layers of the
+ * stack instead of just egressing at tc.
+ *
+ * There are three supported level settings at this time:
+ *
+ * * **BPF_CSUM_LEVEL_INC**: Increases skb->csum_level for skbs
+ * with CHECKSUM_UNNECESSARY.
+ * * **BPF_CSUM_LEVEL_DEC**: Decreases skb->csum_level for skbs
+ * with CHECKSUM_UNNECESSARY.
+ * * **BPF_CSUM_LEVEL_RESET**: Resets skb->csum_level to 0 and
+ * sets CHECKSUM_NONE to force checksum validation by the stack.
+ * * **BPF_CSUM_LEVEL_QUERY**: No-op, returns the current
+ * skb->csum_level.
+ * Return
+ * 0 on success, or a negative error in case of failure. In the
+ * case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level
+ * is returned or the error code -EACCES in case the skb is not
+ * subject to CHECKSUM_UNNECESSARY.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -3349,7 +3388,8 @@ union bpf_attr {
FN(ringbuf_reserve), \
FN(ringbuf_submit), \
FN(ringbuf_discard), \
- FN(ringbuf_query),
+ FN(ringbuf_query), \
+ FN(csum_level),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -3426,6 +3466,14 @@ enum {
BPF_F_CURRENT_NETNS = (-1L),
};
+/* BPF_FUNC_csum_level level values. */
+enum {
+ BPF_CSUM_LEVEL_QUERY,
+ BPF_CSUM_LEVEL_INC,
+ BPF_CSUM_LEVEL_DEC,
+ BPF_CSUM_LEVEL_RESET,
+};
+
/* BPF_FUNC_skb_adjust_room flags. */
enum {
BPF_F_ADJ_ROOM_FIXED_GSO = (1ULL << 0),
@@ -3433,6 +3481,7 @@ enum {
BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 = (1ULL << 2),
BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3),
BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4),
+ BPF_F_ADJ_ROOM_NO_CSUM_RESET = (1ULL << 5),
};
enum {
diff --git a/net/core/filter.c b/net/core/filter.c
index ae82bcb03124..d01a244b5087 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2015,6 +2015,40 @@ static const struct bpf_func_proto bpf_csum_update_proto = {
.arg2_type = ARG_ANYTHING,
};
+BPF_CALL_2(bpf_csum_level, struct sk_buff *, skb, u64, level)
+{
+ /* The interface is to be used in combination with bpf_skb_adjust_room()
+ * for encap/decap of packet headers when BPF_F_ADJ_ROOM_NO_CSUM_RESET
+ * is passed as flags, for example.
+ */
+ switch (level) {
+ case BPF_CSUM_LEVEL_INC:
+ __skb_incr_checksum_unnecessary(skb);
+ break;
+ case BPF_CSUM_LEVEL_DEC:
+ __skb_decr_checksum_unnecessary(skb);
+ break;
+ case BPF_CSUM_LEVEL_RESET:
+ __skb_reset_checksum_unnecessary(skb);
+ break;
+ case BPF_CSUM_LEVEL_QUERY:
+ return skb->ip_summed == CHECKSUM_UNNECESSARY ?
+ skb->csum_level : -EACCES;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_csum_level_proto = {
+ .func = bpf_csum_level,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+};
+
static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb)
{
return dev_forward_skb(dev, skb);
@@ -3113,7 +3147,8 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
{
int ret;
- if (flags & ~BPF_F_ADJ_ROOM_FIXED_GSO)
+ if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO |
+ BPF_F_ADJ_ROOM_NO_CSUM_RESET)))
return -EINVAL;
if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) {
@@ -3163,7 +3198,8 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
u32 off;
int ret;
- if (unlikely(flags & ~BPF_F_ADJ_ROOM_MASK))
+ if (unlikely(flags & ~(BPF_F_ADJ_ROOM_MASK |
+ BPF_F_ADJ_ROOM_NO_CSUM_RESET)))
return -EINVAL;
if (unlikely(len_diff_abs > 0xfffU))
return -EFAULT;
@@ -3191,6 +3227,8 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
ret = shrink ? bpf_skb_net_shrink(skb, off, len_diff_abs, flags) :
bpf_skb_net_grow(skb, off, len_diff_abs, flags);
+ if (!ret && !(flags & BPF_F_ADJ_ROOM_NO_CSUM_RESET))
+ __skb_reset_checksum_unnecessary(skb);
bpf_compute_data_pointers(skb);
return ret;
@@ -6276,6 +6314,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_csum_diff_proto;
case BPF_FUNC_csum_update:
return &bpf_csum_update_proto;
+ case BPF_FUNC_csum_level:
+ return &bpf_csum_level_proto;
case BPF_FUNC_l3_csum_replace:
return &bpf_l3_csum_replace_proto;
case BPF_FUNC_l4_csum_replace:
@@ -6609,6 +6649,8 @@ lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_skb_store_bytes_proto;
case BPF_FUNC_csum_update:
return &bpf_csum_update_proto;
+ case BPF_FUNC_csum_level:
+ return &bpf_csum_level_proto;
case BPF_FUNC_l3_csum_replace:
return &bpf_l3_csum_replace_proto;
case BPF_FUNC_l4_csum_replace:
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index b9ed9f14f2a2..c65b374a5090 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1635,6 +1635,13 @@ union bpf_attr {
* Grow or shrink the room for data in the packet associated to
* *skb* by *len_diff*, and according to the selected *mode*.
*
+ * By default, the helper will reset any offloaded checksum
+ * indicator of the skb to CHECKSUM_NONE. This can be avoided
+ * by the following flag:
+ *
+ * * **BPF_F_ADJ_ROOM_NO_CSUM_RESET**: Do not reset offloaded
+ * checksum data of the skb to CHECKSUM_NONE.
+ *
* There are two supported modes at this time:
*
* * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer
@@ -3213,6 +3220,38 @@ union bpf_attr {
* calculation.
* Return
* Requested value, or 0, if flags are not recognized.
+ *
+ * int bpf_csum_level(struct sk_buff *skb, u64 level)
+ * Description
+ * Change the skbs checksum level by one layer up or down, or
+ * reset it entirely to none in order to have the stack perform
+ * checksum validation. The level is applicable to the following
+ * protocols: TCP, UDP, GRE, SCTP, FCOE. For example, a decap of
+ * | ETH | IP | UDP | GUE | IP | TCP | into | ETH | IP | TCP |
+ * through **bpf_skb_adjust_room**\ () helper with passing in
+ * **BPF_F_ADJ_ROOM_NO_CSUM_RESET** flag would require one call
+ * to **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_DEC** since
+ * the UDP header is removed. Similarly, an encap of the latter
+ * into the former could be accompanied by a helper call to
+ * **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_INC** if the
+ * skb is still intended to be processed in higher layers of the
+ * stack instead of just egressing at tc.
+ *
+ * There are three supported level settings at this time:
+ *
+ * * **BPF_CSUM_LEVEL_INC**: Increases skb->csum_level for skbs
+ * with CHECKSUM_UNNECESSARY.
+ * * **BPF_CSUM_LEVEL_DEC**: Decreases skb->csum_level for skbs
+ * with CHECKSUM_UNNECESSARY.
+ * * **BPF_CSUM_LEVEL_RESET**: Resets skb->csum_level to 0 and
+ * sets CHECKSUM_NONE to force checksum validation by the stack.
+ * * **BPF_CSUM_LEVEL_QUERY**: No-op, returns the current
+ * skb->csum_level.
+ * Return
+ * 0 on success, or a negative error in case of failure. In the
+ * case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level
+ * is returned or the error code -EACCES in case the skb is not
+ * subject to CHECKSUM_UNNECESSARY.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -3349,7 +3388,8 @@ union bpf_attr {
FN(ringbuf_reserve), \
FN(ringbuf_submit), \
FN(ringbuf_discard), \
- FN(ringbuf_query),
+ FN(ringbuf_query), \
+ FN(csum_level),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -3426,6 +3466,14 @@ enum {
BPF_F_CURRENT_NETNS = (-1L),
};
+/* BPF_FUNC_csum_level level values. */
+enum {
+ BPF_CSUM_LEVEL_QUERY,
+ BPF_CSUM_LEVEL_INC,
+ BPF_CSUM_LEVEL_DEC,
+ BPF_CSUM_LEVEL_RESET,
+};
+
/* BPF_FUNC_skb_adjust_room flags. */
enum {
BPF_F_ADJ_ROOM_FIXED_GSO = (1ULL << 0),
@@ -3433,6 +3481,7 @@ enum {
BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 = (1ULL << 2),
BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3),
BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4),
+ BPF_F_ADJ_ROOM_NO_CSUM_RESET = (1ULL << 5),
};
enum {
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
index 1668b993eb86..f0b72e86bee5 100644
--- a/tools/testing/selftests/bpf/progs/test_cls_redirect.c
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
@@ -380,9 +380,10 @@ static ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap)
}
if (bpf_skb_adjust_room(skb, -encap_overhead, BPF_ADJ_ROOM_MAC,
- BPF_F_ADJ_ROOM_FIXED_GSO)) {
+ BPF_F_ADJ_ROOM_FIXED_GSO |
+ BPF_F_ADJ_ROOM_NO_CSUM_RESET) ||
+ bpf_csum_level(skb, BPF_CSUM_LEVEL_DEC))
return TC_ACT_SHOT;
- }
return bpf_redirect(skb->ifindex, BPF_F_INGRESS);
}
@@ -472,7 +473,9 @@ static ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap,
}
if (bpf_skb_adjust_room(skb, delta, BPF_ADJ_ROOM_NET,
- BPF_F_ADJ_ROOM_FIXED_GSO)) {
+ BPF_F_ADJ_ROOM_FIXED_GSO |
+ BPF_F_ADJ_ROOM_NO_CSUM_RESET) ||
+ bpf_csum_level(skb, BPF_CSUM_LEVEL_INC)) {
metrics->errors_total_encap_adjust_failed++;
return TC_ACT_SHOT;
}