From 4929a932be334d68d333089872bc67e4f1d97475 Mon Sep 17 00:00:00 2001 From: Tuong Lien Date: Wed, 24 Jul 2019 08:56:11 +0700 Subject: tipc: optimize link synching mechanism This commit along with the next one are to resolve the issues with the link changeover mechanism. See that commit for details. Basically, for the link synching, from now on, we will send only one single ("dummy") SYNCH message to peer. The SYNCH message does not contain any data, just a header conveying the synch point to the peer. A new node capability flag ("TIPC_TUNNEL_ENHANCED") is introduced for backward compatible! Acked-by: Ying Xue Acked-by: Jon Maloy Suggested-by: Jon Maloy Signed-off-by: Tuong Lien Signed-off-by: David S. Miller --- net/tipc/node.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/tipc/node.c') diff --git a/net/tipc/node.c b/net/tipc/node.c index 3a5be1d7e572..7ca019001f7c 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1649,7 +1649,6 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, int usr = msg_user(hdr); int mtyp = msg_type(hdr); u16 oseqno = msg_seqno(hdr); - u16 iseqno = msg_seqno(msg_inner_hdr(hdr)); u16 exp_pkts = msg_msgcnt(hdr); u16 rcv_nxt, syncpt, dlv_nxt, inputq_len; int state = n->state; @@ -1748,7 +1747,10 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, /* Initiate synch mode if applicable */ if ((usr == TUNNEL_PROTOCOL) && (mtyp == SYNCH_MSG) && (oseqno == 1)) { - syncpt = iseqno + exp_pkts - 1; + if (n->capabilities & TIPC_TUNNEL_ENHANCED) + syncpt = msg_syncpt(hdr); + else + syncpt = msg_seqno(msg_inner_hdr(hdr)) + exp_pkts - 1; if (!tipc_link_is_up(l)) __tipc_node_link_up(n, bearer_id, xmitq); if (n->state == SELF_UP_PEER_UP) { -- cgit v1.2.3 From 6c9081a3915dc0782a8f1424343b794f2cf53d9c Mon Sep 17 00:00:00 2001 From: John Rutherford Date: Wed, 7 Aug 2019 12:52:29 +1000 Subject: tipc: add loopback device tracking Since node internal messages are passed directly to the socket, it is not possible to observe those messages via tcpdump or wireshark. We now remedy this by making it possible to clone such messages and send the clones to the loopback interface. The clones are dropped at reception and have no functional role except making the traffic visible. The feature is enabled if network taps are active for the loopback device. pcap filtering restrictions require the messages to be presented to the receiving side of the loopback device. v3 - Function dev_nit_active used to check for network taps. - Procedure netif_rx_ni used to send cloned messages to loopback device. Signed-off-by: John Rutherford Acked-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/bcast.c | 4 +++- net/tipc/bearer.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ net/tipc/bearer.h | 10 +++++++++ net/tipc/core.c | 5 +++++ net/tipc/core.h | 3 +++ net/tipc/node.c | 1 + net/tipc/topsrv.c | 2 ++ 7 files changed, 88 insertions(+), 1 deletion(-) (limited to 'net/tipc/node.c') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 1336f3cdad38..34f3e5641438 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -406,8 +406,10 @@ int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts, rc = tipc_bcast_xmit(net, pkts, cong_link_cnt); } - if (dests->local) + if (dests->local) { + tipc_loopback_trace(net, &localq); tipc_sk_mcast_rcv(net, &localq, &inputq); + } exit: /* This queue should normally be empty by now */ __skb_queue_purge(pkts); diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index a809c0ec8d15..0214aa1c4427 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -389,6 +389,11 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, dev_put(dev); return -EINVAL; } + if (dev == net->loopback_dev) { + dev_put(dev); + pr_info("Enabling <%s> not permitted\n", b->name); + return -EINVAL; + } /* Autoconfigure own node identity if needed */ if (!tipc_own_id(net) && hwaddr_len <= NODE_ID_LEN) { @@ -674,6 +679,65 @@ void tipc_bearer_stop(struct net *net) } } +void tipc_clone_to_loopback(struct net *net, struct sk_buff_head *pkts) +{ + struct net_device *dev = net->loopback_dev; + struct sk_buff *skb, *_skb; + int exp; + + skb_queue_walk(pkts, _skb) { + skb = pskb_copy(_skb, GFP_ATOMIC); + if (!skb) + continue; + + exp = SKB_DATA_ALIGN(dev->hard_header_len - skb_headroom(skb)); + if (exp > 0 && pskb_expand_head(skb, exp, 0, GFP_ATOMIC)) { + kfree_skb(skb); + continue; + } + + skb_reset_network_header(skb); + dev_hard_header(skb, dev, ETH_P_TIPC, dev->dev_addr, + dev->dev_addr, skb->len); + skb->dev = dev; + skb->pkt_type = PACKET_HOST; + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->protocol = eth_type_trans(skb, dev); + netif_rx_ni(skb); + } +} + +static int tipc_loopback_rcv_pkt(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *od) +{ + consume_skb(skb); + return NET_RX_SUCCESS; +} + +int tipc_attach_loopback(struct net *net) +{ + struct net_device *dev = net->loopback_dev; + struct tipc_net *tn = tipc_net(net); + + if (!dev) + return -ENODEV; + + dev_hold(dev); + tn->loopback_pt.dev = dev; + tn->loopback_pt.type = htons(ETH_P_TIPC); + tn->loopback_pt.func = tipc_loopback_rcv_pkt; + dev_add_pack(&tn->loopback_pt); + return 0; +} + +void tipc_detach_loopback(struct net *net) +{ + struct tipc_net *tn = tipc_net(net); + + dev_remove_pack(&tn->loopback_pt); + dev_put(net->loopback_dev); +} + /* Caller should hold rtnl_lock to protect the bearer */ static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg, struct tipc_bearer *bearer, int nlflags) diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 7f4c569594a5..ea0f3c49cbed 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -232,6 +232,16 @@ void tipc_bearer_xmit(struct net *net, u32 bearer_id, struct tipc_media_addr *dst); void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id, struct sk_buff_head *xmitq); +void tipc_clone_to_loopback(struct net *net, struct sk_buff_head *pkts); +int tipc_attach_loopback(struct net *net); +void tipc_detach_loopback(struct net *net); + +static inline void tipc_loopback_trace(struct net *net, + struct sk_buff_head *pkts) +{ + if (unlikely(dev_nit_active(net->loopback_dev))) + tipc_clone_to_loopback(net, pkts); +} /* check if device MTU is too low for tipc headers */ static inline bool tipc_mtu_bad(struct net_device *dev, unsigned int reserve) diff --git a/net/tipc/core.c b/net/tipc/core.c index c8370722f0bb..23cb379a93d6 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -82,6 +82,10 @@ static int __net_init tipc_init_net(struct net *net) if (err) goto out_bclink; + err = tipc_attach_loopback(net); + if (err) + goto out_bclink; + return 0; out_bclink: @@ -94,6 +98,7 @@ out_sk_rht: static void __net_exit tipc_exit_net(struct net *net) { + tipc_detach_loopback(net); tipc_net_stop(net); tipc_bcast_stop(net); tipc_nametbl_stop(net); diff --git a/net/tipc/core.h b/net/tipc/core.h index 7a68e1b6a066..60d829581068 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -125,6 +125,9 @@ struct tipc_net { /* Cluster capabilities */ u16 capabilities; + + /* Tracing of node internal messages */ + struct packet_type loopback_pt; }; static inline struct tipc_net *tipc_net(struct net *net) diff --git a/net/tipc/node.c b/net/tipc/node.c index 7ca019001f7c..1bdcf0fc1a4d 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1443,6 +1443,7 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, int rc; if (in_own_node(net, dnode)) { + tipc_loopback_trace(net, list); tipc_sk_rcv(net, list); return 0; } diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index ca8ac96d22a9..3a12fc18239b 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -40,6 +40,7 @@ #include "socket.h" #include "addr.h" #include "msg.h" +#include "bearer.h" #include #include @@ -608,6 +609,7 @@ static void tipc_topsrv_kern_evt(struct net *net, struct tipc_event *evt) memcpy(msg_data(buf_msg(skb)), evt, sizeof(*evt)); skb_queue_head_init(&evtq); __skb_queue_tail(&evtq, skb); + tipc_loopback_trace(net, &evtq); tipc_sk_rcv(net, &evtq); } -- cgit v1.2.3 From e654f9f53b45fde3fcc8051830b212c7a8f36148 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Thu, 15 Aug 2019 16:42:50 +0200 Subject: tipc: clean up skb list lock handling on send path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The policy for handling the skb list locks on the send and receive paths is simple. - On the send path we never need to grab the lock on the 'xmitq' list when the destination is an exernal node. - On the receive path we always need to grab the lock on the 'inputq' list, irrespective of source node. However, when transmitting node local messages those will eventually end up on the receive path of a local socket, meaning that the argument 'xmitq' in tipc_node_xmit() will become the 'ínputq' argument in the function tipc_sk_rcv(). This has been handled by always initializing the spinlock of the 'xmitq' list at message creation, just in case it may end up on the receive path later, and despite knowing that the lock in most cases never will be used. This approach is inaccurate and confusing, and has also concealed the fact that the stated 'no lock grabbing' policy for the send path is violated in some cases. We now clean up this by never initializing the lock at message creation, instead doing this at the moment we find that the message actually will enter the receive path. At the same time we fix the four locations where we incorrectly access the spinlock on the send/error path. This patch also reverts commit d12cffe9329f ("tipc: ensure head->lock is initialised") which has now become redundant. CC: Eric Dumazet Reported-by: Chris Packham Acked-by: Ying Xue Signed-off-by: Jon Maloy Reviewed-by: Xin Long Signed-off-by: David S. Miller --- net/tipc/bcast.c | 10 +++++----- net/tipc/group.c | 4 ++-- net/tipc/link.c | 14 +++++++------- net/tipc/name_distr.c | 2 +- net/tipc/node.c | 7 ++++--- net/tipc/socket.c | 14 +++++++------- 6 files changed, 26 insertions(+), 25 deletions(-) (limited to 'net/tipc/node.c') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 34f3e5641438..6ef1abdd525f 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -185,7 +185,7 @@ static void tipc_bcbase_xmit(struct net *net, struct sk_buff_head *xmitq) } /* We have to transmit across all bearers */ - skb_queue_head_init(&_xmitq); + __skb_queue_head_init(&_xmitq); for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { if (!bb->dests[bearer_id]) continue; @@ -256,7 +256,7 @@ static int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts, struct sk_buff_head xmitq; int rc = 0; - skb_queue_head_init(&xmitq); + __skb_queue_head_init(&xmitq); tipc_bcast_lock(net); if (tipc_link_bc_peers(l)) rc = tipc_link_xmit(l, pkts, &xmitq); @@ -286,7 +286,7 @@ static int tipc_rcast_xmit(struct net *net, struct sk_buff_head *pkts, u32 dnode, selector; selector = msg_link_selector(buf_msg(skb_peek(pkts))); - skb_queue_head_init(&_pkts); + __skb_queue_head_init(&_pkts); list_for_each_entry_safe(dst, tmp, &dests->list, list) { dnode = dst->node; @@ -344,7 +344,7 @@ static int tipc_mcast_send_sync(struct net *net, struct sk_buff *skb, msg_set_size(_hdr, MCAST_H_SIZE); msg_set_is_rcast(_hdr, !msg_is_rcast(hdr)); - skb_queue_head_init(&tmpq); + __skb_queue_head_init(&tmpq); __skb_queue_tail(&tmpq, _skb); if (method->rcast) tipc_bcast_xmit(net, &tmpq, cong_link_cnt); @@ -378,7 +378,7 @@ int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts, int rc = 0; skb_queue_head_init(&inputq); - skb_queue_head_init(&localq); + __skb_queue_head_init(&localq); /* Clone packets before they are consumed by next call */ if (dests->local && !tipc_msg_reassemble(pkts, &localq)) { diff --git a/net/tipc/group.c b/net/tipc/group.c index 5f98d38bcf08..89257e2a980d 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -199,7 +199,7 @@ void tipc_group_join(struct net *net, struct tipc_group *grp, int *sk_rcvbuf) struct tipc_member *m, *tmp; struct sk_buff_head xmitq; - skb_queue_head_init(&xmitq); + __skb_queue_head_init(&xmitq); rbtree_postorder_for_each_entry_safe(m, tmp, tree, tree_node) { tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, &xmitq); tipc_group_update_member(m, 0); @@ -435,7 +435,7 @@ bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport, return true; if (state == MBR_PENDING && adv == ADV_IDLE) return true; - skb_queue_head_init(&xmitq); + __skb_queue_head_init(&xmitq); tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, &xmitq); tipc_node_distr_xmit(grp->net, &xmitq); return true; diff --git a/net/tipc/link.c b/net/tipc/link.c index dd3155b14654..289e848084ac 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -959,7 +959,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, pr_warn("Too large msg, purging xmit list %d %d %d %d %d!\n", skb_queue_len(list), msg_user(hdr), msg_type(hdr), msg_size(hdr), mtu); - skb_queue_purge(list); + __skb_queue_purge(list); return -EMSGSIZE; } @@ -988,7 +988,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, if (likely(skb_queue_len(transmq) < maxwin)) { _skb = skb_clone(skb, GFP_ATOMIC); if (!_skb) { - skb_queue_purge(list); + __skb_queue_purge(list); return -ENOBUFS; } __skb_dequeue(list); @@ -1668,7 +1668,7 @@ void tipc_link_create_dummy_tnl_msg(struct tipc_link *l, struct sk_buff *skb; u32 dnode = l->addr; - skb_queue_head_init(&tnlq); + __skb_queue_head_init(&tnlq); skb = tipc_msg_create(TUNNEL_PROTOCOL, FAILOVER_MSG, INT_H_SIZE, BASIC_H_SIZE, dnode, onode, 0, 0, 0); @@ -1708,9 +1708,9 @@ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, if (!tnl) return; - skb_queue_head_init(&tnlq); - skb_queue_head_init(&tmpxq); - skb_queue_head_init(&frags); + __skb_queue_head_init(&tnlq); + __skb_queue_head_init(&tmpxq); + __skb_queue_head_init(&frags); /* At least one packet required for safe algorithm => add dummy */ skb = tipc_msg_create(TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG, @@ -1720,7 +1720,7 @@ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, pr_warn("%sunable to create tunnel packet\n", link_co_err); return; } - skb_queue_tail(&tnlq, skb); + __skb_queue_tail(&tnlq, skb); tipc_link_xmit(l, &tnlq, &tmpxq); __skb_queue_purge(&tmpxq); diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 44abc8e9c990..61219f0b9677 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -190,7 +190,7 @@ void tipc_named_node_up(struct net *net, u32 dnode) struct name_table *nt = tipc_name_table(net); struct sk_buff_head head; - skb_queue_head_init(&head); + __skb_queue_head_init(&head); read_lock_bh(&nt->cluster_scope_lock); named_distribute(net, &head, dnode, &nt->cluster_scope); diff --git a/net/tipc/node.c b/net/tipc/node.c index 1bdcf0fc1a4d..c8f6177dd5a2 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1444,13 +1444,14 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, if (in_own_node(net, dnode)) { tipc_loopback_trace(net, list); + spin_lock_init(&list->lock); tipc_sk_rcv(net, list); return 0; } n = tipc_node_find(net, dnode); if (unlikely(!n)) { - skb_queue_purge(list); + __skb_queue_purge(list); return -EHOSTUNREACH; } @@ -1459,7 +1460,7 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, if (unlikely(bearer_id == INVALID_BEARER_ID)) { tipc_node_read_unlock(n); tipc_node_put(n); - skb_queue_purge(list); + __skb_queue_purge(list); return -EHOSTUNREACH; } @@ -1491,7 +1492,7 @@ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode, { struct sk_buff_head head; - skb_queue_head_init(&head); + __skb_queue_head_init(&head); __skb_queue_tail(&head, skb); tipc_node_xmit(net, &head, dnode, selector); return 0; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 83ae41d7e554..3b9f8cc328f5 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -809,7 +809,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, msg_set_nameupper(hdr, seq->upper); /* Build message as chain of buffers */ - skb_queue_head_init(&pkts); + __skb_queue_head_init(&pkts); rc = tipc_msg_build(hdr, msg, 0, dlen, mtu, &pkts); /* Send message if build was successful */ @@ -853,7 +853,7 @@ static int tipc_send_group_msg(struct net *net, struct tipc_sock *tsk, msg_set_grp_bc_seqno(hdr, bc_snd_nxt); /* Build message as chain of buffers */ - skb_queue_head_init(&pkts); + __skb_queue_head_init(&pkts); mtu = tipc_node_get_mtu(net, dnode, tsk->portid); rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts); if (unlikely(rc != dlen)) @@ -1058,7 +1058,7 @@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m, msg_set_grp_bc_ack_req(hdr, ack); /* Build message as chain of buffers */ - skb_queue_head_init(&pkts); + __skb_queue_head_init(&pkts); rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts); if (unlikely(rc != dlen)) return rc; @@ -1387,7 +1387,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) if (unlikely(rc)) return rc; - skb_queue_head_init(&pkts); + __skb_queue_head_init(&pkts); mtu = tipc_node_get_mtu(net, dnode, tsk->portid); rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts); if (unlikely(rc != dlen)) @@ -1445,7 +1445,7 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen) int send, sent = 0; int rc = 0; - skb_queue_head_init(&pkts); + __skb_queue_head_init(&pkts); if (unlikely(dlen > INT_MAX)) return -EMSGSIZE; @@ -1805,7 +1805,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, /* Send group flow control advertisement when applicable */ if (tsk->group && msg_in_group(hdr) && !grp_evt) { - skb_queue_head_init(&xmitq); + __skb_queue_head_init(&xmitq); tipc_group_update_rcv_win(tsk->group, tsk_blocks(hlen + dlen), msg_orignode(hdr), msg_origport(hdr), &xmitq); @@ -2674,7 +2674,7 @@ static void tipc_sk_timeout(struct timer_list *t) struct sk_buff_head list; int rc = 0; - skb_queue_head_init(&list); + __skb_queue_head_init(&list); bh_lock_sock(sk); /* Try again later if socket is busy */ -- cgit v1.2.3