summaryrefslogtreecommitdiff
path: root/net/sunrpc
diff options
context:
space:
mode:
Diffstat (limited to 'net/sunrpc')
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c11
-rw-r--r--net/sunrpc/clnt.c12
-rw-r--r--net/sunrpc/rpcb_clnt.c7
-rw-r--r--net/sunrpc/sched.c5
-rw-r--r--net/sunrpc/svc.c9
-rw-r--r--net/sunrpc/svc_xprt.c76
-rw-r--r--net/sunrpc/svcauth_unix.c9
-rw-r--r--net/sunrpc/svcsock.c26
-rw-r--r--net/sunrpc/xprt.c18
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c4
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c209
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c39
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c14
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c114
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c111
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c105
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c15
-rw-r--r--net/sunrpc/xprtrdma/transport.c6
-rw-r--r--net/sunrpc/xprtrdma/verbs.c131
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h29
-rw-r--r--net/sunrpc/xprtsock.c9
21 files changed, 450 insertions, 509 deletions
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index bd4678db9d76..6dff64374bfe 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1825,11 +1825,14 @@ static int
svcauth_gss_release(struct svc_rqst *rqstp)
{
struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data;
- struct rpc_gss_wire_cred *gc = &gsd->clcred;
+ struct rpc_gss_wire_cred *gc;
struct xdr_buf *resbuf = &rqstp->rq_res;
int stat = -EINVAL;
struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id);
+ if (!gsd)
+ goto out;
+ gc = &gsd->clcred;
if (gc->gc_proc != RPC_GSS_PROC_DATA)
goto out;
/* Release can be called twice, but we only wrap once. */
@@ -1870,10 +1873,10 @@ out_err:
if (rqstp->rq_cred.cr_group_info)
put_group_info(rqstp->rq_cred.cr_group_info);
rqstp->rq_cred.cr_group_info = NULL;
- if (gsd->rsci)
+ if (gsd && gsd->rsci) {
cache_put(&gsd->rsci->h, sn->rsc_cache);
- gsd->rsci = NULL;
-
+ gsd->rsci = NULL;
+ }
return stat;
}
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 612f0a641f4c..f555d335e910 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1799,7 +1799,6 @@ call_allocate(struct rpc_task *task)
status = xprt->ops->buf_alloc(task);
trace_rpc_buf_alloc(task, status);
- xprt_inject_disconnect(xprt);
if (status == 0)
return;
if (status != -ENOMEM) {
@@ -2458,12 +2457,6 @@ call_decode(struct rpc_task *task)
}
/*
- * Ensure that we see all writes made by xprt_complete_rqst()
- * before it changed req->rq_reply_bytes_recvd.
- */
- smp_rmb();
-
- /*
* Did we ever call xprt_complete_rqst()? If not, we should assume
* the message is incomplete.
*/
@@ -2471,6 +2464,11 @@ call_decode(struct rpc_task *task)
if (!req->rq_reply_bytes_recvd)
goto out;
+ /* Ensure that we see all writes made by xprt_complete_rqst()
+ * before it changed req->rq_reply_bytes_recvd.
+ */
+ smp_rmb();
+
req->rq_rcv_buf.len = req->rq_private_buf.len;
trace_rpc_xdr_recvfrom(task, &req->rq_rcv_buf);
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 38fe2ce8a5aa..647b323cc1d5 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -344,13 +344,15 @@ static struct rpc_clnt *rpcb_create(struct net *net, const char *nodename,
const char *hostname,
struct sockaddr *srvaddr, size_t salen,
int proto, u32 version,
- const struct cred *cred)
+ const struct cred *cred,
+ const struct rpc_timeout *timeo)
{
struct rpc_create_args args = {
.net = net,
.protocol = proto,
.address = srvaddr,
.addrsize = salen,
+ .timeout = timeo,
.servername = hostname,
.nodename = nodename,
.program = &rpcb_program,
@@ -705,7 +707,8 @@ void rpcb_getport_async(struct rpc_task *task)
clnt->cl_nodename,
xprt->servername, sap, salen,
xprt->prot, bind_version,
- clnt->cl_cred);
+ clnt->cl_cred,
+ task->tk_client->cl_timeout);
if (IS_ERR(rpcb_clnt)) {
status = PTR_ERR(rpcb_clnt);
goto bailout_nofree;
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index cf702a5f7fe5..39ed0e0afe6d 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -963,8 +963,11 @@ void rpc_execute(struct rpc_task *task)
rpc_set_active(task);
rpc_make_runnable(rpciod_workqueue, task);
- if (!is_async)
+ if (!is_async) {
+ unsigned int pflags = memalloc_nofs_save();
__rpc_execute(task);
+ memalloc_nofs_restore(pflags);
+ }
}
static void rpc_async_schedule(struct work_struct *work)
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 61fb8a18552c..0de918cb3d90 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -846,7 +846,8 @@ void
svc_rqst_free(struct svc_rqst *rqstp)
{
svc_release_buffer(rqstp);
- put_page(rqstp->rq_scratch_page);
+ if (rqstp->rq_scratch_page)
+ put_page(rqstp->rq_scratch_page);
kfree(rqstp->rq_resp);
kfree(rqstp->rq_argp);
kfree(rqstp->rq_auth_data);
@@ -1413,7 +1414,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
sendit:
if (svc_authorise(rqstp))
- goto close;
+ goto close_xprt;
return 1; /* Caller can now send it */
release_dropit:
@@ -1425,6 +1426,8 @@ release_dropit:
return 0;
close:
+ svc_authorise(rqstp);
+close_xprt:
if (rqstp->rq_xprt && test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags))
svc_close_xprt(rqstp->rq_xprt);
dprintk("svc: svc_process close\n");
@@ -1433,7 +1436,7 @@ release_dropit:
err_short_len:
svc_printk(rqstp, "short len %zd, dropping request\n",
argv->iov_len);
- goto close;
+ goto close_xprt;
err_bad_rpc:
serv->sv_stats->rpcbadfmt++;
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index dcc50ae54550..d66a8e44a1ae 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -139,6 +139,20 @@ int svc_print_xprts(char *buf, int maxlen)
return len;
}
+/**
+ * svc_xprt_deferred_close - Close a transport
+ * @xprt: transport instance
+ *
+ * Used in contexts that need to defer the work of shutting down
+ * the transport to an nfsd thread.
+ */
+void svc_xprt_deferred_close(struct svc_xprt *xprt)
+{
+ if (!test_and_set_bit(XPT_CLOSE, &xprt->xpt_flags))
+ svc_xprt_enqueue(xprt);
+}
+EXPORT_SYMBOL_GPL(svc_xprt_deferred_close);
+
static void svc_xprt_free(struct kref *kref)
{
struct svc_xprt *xprt =
@@ -233,21 +247,25 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
return xprt;
}
-/*
- * svc_xprt_received conditionally queues the transport for processing
- * by another thread. The caller must hold the XPT_BUSY bit and must
+/**
+ * svc_xprt_received - start next receiver thread
+ * @xprt: controlling transport
+ *
+ * The caller must hold the XPT_BUSY bit and must
* not thereafter touch transport data.
*
* Note: XPT_DATA only gets cleared when a read-attempt finds no (or
* insufficient) data.
*/
-static void svc_xprt_received(struct svc_xprt *xprt)
+void svc_xprt_received(struct svc_xprt *xprt)
{
if (!test_bit(XPT_BUSY, &xprt->xpt_flags)) {
WARN_ONCE(1, "xprt=0x%p already busy!", xprt);
return;
}
+ trace_svc_xprt_received(xprt);
+
/* As soon as we clear busy, the xprt could be closed and
* 'put', so we need a reference to call svc_enqueue_xprt with:
*/
@@ -257,6 +275,7 @@ static void svc_xprt_received(struct svc_xprt *xprt)
xprt->xpt_server->sv_ops->svo_enqueue_xprt(xprt);
svc_xprt_put(xprt);
}
+EXPORT_SYMBOL_GPL(svc_xprt_received);
void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new)
{
@@ -642,36 +661,34 @@ static void svc_check_conn_limits(struct svc_serv *serv)
static int svc_alloc_arg(struct svc_rqst *rqstp)
{
struct svc_serv *serv = rqstp->rq_server;
- struct xdr_buf *arg;
- int pages;
- int i;
+ struct xdr_buf *arg = &rqstp->rq_arg;
+ unsigned long pages, filled;
- /* now allocate needed pages. If we get a failure, sleep briefly */
pages = (serv->sv_max_mesg + 2 * PAGE_SIZE) >> PAGE_SHIFT;
if (pages > RPCSVC_MAXPAGES) {
- pr_warn_once("svc: warning: pages=%u > RPCSVC_MAXPAGES=%lu\n",
+ pr_warn_once("svc: warning: pages=%lu > RPCSVC_MAXPAGES=%lu\n",
pages, RPCSVC_MAXPAGES);
/* use as many pages as possible */
pages = RPCSVC_MAXPAGES;
}
- for (i = 0; i < pages ; i++)
- while (rqstp->rq_pages[i] == NULL) {
- struct page *p = alloc_page(GFP_KERNEL);
- if (!p) {
- set_current_state(TASK_INTERRUPTIBLE);
- if (signalled() || kthread_should_stop()) {
- set_current_state(TASK_RUNNING);
- return -EINTR;
- }
- schedule_timeout(msecs_to_jiffies(500));
- }
- rqstp->rq_pages[i] = p;
+
+ for (;;) {
+ filled = alloc_pages_bulk_array(GFP_KERNEL, pages,
+ rqstp->rq_pages);
+ if (filled == pages)
+ break;
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (signalled() || kthread_should_stop()) {
+ set_current_state(TASK_RUNNING);
+ return -EINTR;
}
- rqstp->rq_page_end = &rqstp->rq_pages[i];
- rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */
+ schedule_timeout(msecs_to_jiffies(500));
+ }
+ rqstp->rq_page_end = &rqstp->rq_pages[pages];
+ rqstp->rq_pages[pages] = NULL; /* this might be seen in nfsd_splice_actor() */
/* Make arg->head point to first page and arg->pages point to rest */
- arg = &rqstp->rq_arg;
arg->head[0].iov_base = page_address(rqstp->rq_pages[0]);
arg->head[0].iov_len = PAGE_SIZE;
arg->pages = rqstp->rq_pages + 1;
@@ -801,8 +818,10 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
newxpt->xpt_cred = get_cred(xprt->xpt_cred);
svc_add_new_temp_xprt(serv, newxpt);
trace_svc_xprt_accept(newxpt, serv->sv_name);
- } else
+ } else {
module_put(xprt->xpt_class->xcl_owner);
+ }
+ svc_xprt_received(xprt);
} else if (svc_xprt_reserve_slot(rqstp, xprt)) {
/* XPT_DATA|XPT_DEFERRED case: */
dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
@@ -817,8 +836,6 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
rqstp->rq_reserved = serv->sv_max_mesg;
atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
}
- /* clear XPT_BUSY: */
- svc_xprt_received(xprt);
out:
trace_svc_handle_xprt(xprt, len);
return len;
@@ -1060,7 +1077,7 @@ static int svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, st
struct svc_xprt *xprt;
int ret = 0;
- spin_lock(&serv->sv_lock);
+ spin_lock_bh(&serv->sv_lock);
list_for_each_entry(xprt, xprt_list, xpt_list) {
if (xprt->xpt_net != net)
continue;
@@ -1068,7 +1085,7 @@ static int svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, st
set_bit(XPT_CLOSE, &xprt->xpt_flags);
svc_xprt_enqueue(xprt);
}
- spin_unlock(&serv->sv_lock);
+ spin_unlock_bh(&serv->sv_lock);
return ret;
}
@@ -1229,6 +1246,7 @@ static noinline int svc_deferred_recv(struct svc_rqst *rqstp)
rqstp->rq_xprt_hlen = dr->xprt_hlen;
rqstp->rq_daddr = dr->daddr;
rqstp->rq_respages = rqstp->rq_pages;
+ svc_xprt_received(rqstp->rq_xprt);
return (dr->argslen<<2) - dr->xprt_hlen;
}
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 97c0bddba7a3..35b7966ac3b3 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -303,15 +303,6 @@ static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class,
return NULL;
}
-static inline struct ip_map *ip_map_lookup(struct net *net, char *class,
- struct in6_addr *addr)
-{
- struct sunrpc_net *sn;
-
- sn = net_generic(net, sunrpc_net_id);
- return __ip_map_lookup(sn->ip_map_cache, class, addr);
-}
-
static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm,
struct unix_domain *udom, time64_t expiry)
{
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 2e2f007dfc9f..478f857cdaed 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -519,6 +519,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
if (serv->sv_stats)
serv->sv_stats->netudpcnt++;
+ svc_xprt_received(rqstp->rq_xprt);
return len;
out_recv_err:
@@ -527,7 +528,7 @@ out_recv_err:
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
}
trace_svcsock_udp_recv_err(&svsk->sk_xprt, err);
- return 0;
+ goto out_clear_busy;
out_cmsg_err:
net_warn_ratelimited("svc: received unknown control message %d/%d; dropping RPC reply datagram\n",
cmh->cmsg_level, cmh->cmsg_type);
@@ -536,6 +537,8 @@ out_bh_enable:
local_bh_enable();
out_free:
kfree_skb(skb);
+out_clear_busy:
+ svc_xprt_received(rqstp->rq_xprt);
return 0;
}
@@ -728,10 +731,8 @@ static void svc_tcp_state_change(struct sock *sk)
rmb();
svsk->sk_ostate(sk);
trace_svcsock_tcp_state(&svsk->sk_xprt, svsk->sk_sock);
- if (sk->sk_state != TCP_ESTABLISHED) {
- set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
- svc_xprt_enqueue(&svsk->sk_xprt);
- }
+ if (sk->sk_state != TCP_ESTABLISHED)
+ svc_xprt_deferred_close(&svsk->sk_xprt);
}
}
@@ -901,7 +902,7 @@ err_too_large:
net_notice_ratelimited("svc: %s %s RPC fragment too large: %d\n",
__func__, svsk->sk_xprt.xpt_server->sv_name,
svc_sock_reclen(svsk));
- set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
+ svc_xprt_deferred_close(&svsk->sk_xprt);
err_short:
return -EAGAIN;
}
@@ -1035,6 +1036,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
if (serv->sv_stats)
serv->sv_stats->nettcpcnt++;
+ svc_xprt_received(rqstp->rq_xprt);
return rqstp->rq_arg.len;
err_incomplete:
@@ -1052,13 +1054,14 @@ error:
if (len != -EAGAIN)
goto err_delete;
trace_svcsock_tcp_recv_eagain(&svsk->sk_xprt, 0);
- return 0;
+ goto err_noclose;
err_nuts:
svsk->sk_datalen = 0;
err_delete:
trace_svcsock_tcp_recv_err(&svsk->sk_xprt, len);
- set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
+ svc_xprt_deferred_close(&svsk->sk_xprt);
err_noclose:
+ svc_xprt_received(rqstp->rq_xprt);
return 0; /* record not complete */
}
@@ -1171,7 +1174,7 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
tcp_sock_set_cork(svsk->sk_sk, true);
err = svc_tcp_sendmsg(svsk->sk_sock, xdr, marker, &sent);
xdr_free_bvec(xdr);
- trace_svcsock_tcp_send(xprt, err < 0 ? err : sent);
+ trace_svcsock_tcp_send(xprt, err < 0 ? (long)err : sent);
if (err < 0 || sent != (xdr->len + sizeof(marker)))
goto out_close;
if (atomic_dec_and_test(&svsk->sk_sendqlen))
@@ -1188,8 +1191,7 @@ out_close:
xprt->xpt_server->sv_name,
(err < 0) ? "got error" : "sent",
(err < 0) ? err : sent, xdr->len);
- set_bit(XPT_CLOSE, &xprt->xpt_flags);
- svc_xprt_enqueue(xprt);
+ svc_xprt_deferred_close(xprt);
atomic_dec(&svsk->sk_sendqlen);
mutex_unlock(&xprt->xpt_mutex);
return -EAGAIN;
@@ -1268,7 +1270,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
case TCP_ESTABLISHED:
break;
default:
- set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
+ svc_xprt_deferred_close(&svsk->sk_xprt);
}
}
}
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 691ccf8049a4..e5b5a960a69b 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -698,9 +698,9 @@ int xprt_adjust_timeout(struct rpc_rqst *req)
const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout;
int status = 0;
- if (time_before(jiffies, req->rq_minortimeo))
- return status;
if (time_before(jiffies, req->rq_majortimeo)) {
+ if (time_before(jiffies, req->rq_minortimeo))
+ return status;
if (to->to_exponential)
req->rq_timeout <<= 1;
else
@@ -1352,6 +1352,7 @@ xprt_request_enqueue_transmit(struct rpc_task *task)
list_add_tail(&req->rq_xmit, &xprt->xmit_queue);
INIT_LIST_HEAD(&req->rq_xmit2);
out:
+ atomic_long_inc(&xprt->xmit_queuelen);
set_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate);
spin_unlock(&xprt->queue_lock);
}
@@ -1381,6 +1382,7 @@ xprt_request_dequeue_transmit_locked(struct rpc_task *task)
}
} else
list_del(&req->rq_xmit2);
+ atomic_long_dec(&req->rq_xprt->xmit_queuelen);
}
/**
@@ -1469,8 +1471,6 @@ bool xprt_prepare_transmit(struct rpc_task *task)
struct rpc_xprt *xprt = req->rq_xprt;
if (!xprt_lock_write(xprt, task)) {
- trace_xprt_transmit_queued(xprt, task);
-
/* Race breaker: someone may have transmitted us */
if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
rpc_wake_up_queued_task_set_status(&xprt->sending,
@@ -1483,7 +1483,10 @@ bool xprt_prepare_transmit(struct rpc_task *task)
void xprt_end_transmit(struct rpc_task *task)
{
- xprt_release_write(task->tk_rqstp->rq_xprt, task);
+ struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
+
+ xprt_inject_disconnect(xprt);
+ xprt_release_write(xprt, task);
}
/**
@@ -1537,8 +1540,10 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
return status;
}
- if (is_retrans)
+ if (is_retrans) {
task->tk_client->cl_stats->rpcretrans++;
+ trace_xprt_retransmit(req);
+ }
xprt_inject_disconnect(xprt);
@@ -1885,7 +1890,6 @@ void xprt_release(struct rpc_task *task)
spin_unlock(&xprt->transport_lock);
if (req->rq_buffer)
xprt->ops->buf_free(task);
- xprt_inject_disconnect(xprt);
xdr_free_bvec(&req->rq_rcv_buf);
xdr_free_bvec(&req->rq_snd_buf);
if (req->rq_cred != NULL)
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index a249837d6a55..1151efd09b27 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -155,9 +155,11 @@ void xprt_rdma_bc_destroy(struct rpc_xprt *xprt, unsigned int reqs)
void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
{
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+ struct rpcrdma_rep *rep = req->rl_reply;
struct rpc_xprt *xprt = rqst->rq_xprt;
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
- rpcrdma_recv_buffer_put(req->rl_reply);
+ rpcrdma_rep_put(&r_xprt->rx_buf, rep);
req->rl_reply = NULL;
spin_lock(&xprt->bc_pa_lock);
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 766a1048a48a..229fcc9a9064 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -49,20 +49,13 @@
# define RPCDBG_FACILITY RPCDBG_TRANS
#endif
-/**
- * frwr_release_mr - Destroy one MR
- * @mr: MR allocated by frwr_mr_init
- *
- */
-void frwr_release_mr(struct rpcrdma_mr *mr)
+static void frwr_cid_init(struct rpcrdma_ep *ep,
+ struct rpcrdma_mr *mr)
{
- int rc;
+ struct rpc_rdma_cid *cid = &mr->mr_cid;
- rc = ib_dereg_mr(mr->frwr.fr_mr);
- if (rc)
- trace_xprtrdma_frwr_dereg(mr, rc);
- kfree(mr->mr_sg);
- kfree(mr);
+ cid->ci_queue_id = ep->re_attr.send_cq->res.id;
+ cid->ci_completion_id = mr->mr_ibmr->res.id;
}
static void frwr_mr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
@@ -75,20 +68,22 @@ static void frwr_mr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
}
}
-static void frwr_mr_recycle(struct rpcrdma_mr *mr)
+/**
+ * frwr_mr_release - Destroy one MR
+ * @mr: MR allocated by frwr_mr_init
+ *
+ */
+void frwr_mr_release(struct rpcrdma_mr *mr)
{
- struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
-
- trace_xprtrdma_mr_recycle(mr);
-
- frwr_mr_unmap(r_xprt, mr);
+ int rc;
- spin_lock(&r_xprt->rx_buf.rb_lock);
- list_del(&mr->mr_all);
- r_xprt->rx_stats.mrs_recycled++;
- spin_unlock(&r_xprt->rx_buf.rb_lock);
+ frwr_mr_unmap(mr->mr_xprt, mr);
- frwr_release_mr(mr);
+ rc = ib_dereg_mr(mr->mr_ibmr);
+ if (rc)
+ trace_xprtrdma_frwr_dereg(mr, rc);
+ kfree(mr->mr_sg);
+ kfree(mr);
}
static void frwr_mr_put(struct rpcrdma_mr *mr)
@@ -144,10 +139,11 @@ int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
goto out_list_err;
mr->mr_xprt = r_xprt;
- mr->frwr.fr_mr = frmr;
+ mr->mr_ibmr = frmr;
mr->mr_device = NULL;
INIT_LIST_HEAD(&mr->mr_list);
- init_completion(&mr->frwr.fr_linv_done);
+ init_completion(&mr->mr_linv_done);
+ frwr_cid_init(ep, mr);
sg_init_table(sg, depth);
mr->mr_sg = sg;
@@ -257,6 +253,7 @@ int frwr_query_device(struct rpcrdma_ep *ep, const struct ib_device *device)
ep->re_attr.cap.max_send_wr += 1; /* for ib_drain_sq */
ep->re_attr.cap.max_recv_wr = ep->re_max_requests;
ep->re_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
+ ep->re_attr.cap.max_recv_wr += RPCRDMA_MAX_RECV_BATCH;
ep->re_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */
ep->re_max_rdma_segs =
@@ -326,7 +323,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
goto out_dmamap_err;
mr->mr_device = ep->re_id->device;
- ibmr = mr->frwr.fr_mr;
+ ibmr = mr->mr_ibmr;
n = ib_map_mr_sg(ibmr, mr->mr_sg, dma_nents, NULL, PAGE_SIZE);
if (n != dma_nents)
goto out_mapmr_err;
@@ -336,7 +333,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
key = (u8)(ibmr->rkey & 0x000000FF);
ib_update_fast_reg_key(ibmr, ++key);
- reg_wr = &mr->frwr.fr_regwr;
+ reg_wr = &mr->mr_regwr;
reg_wr->mr = ibmr;
reg_wr->key = ibmr->rkey;
reg_wr->access = writing ?
@@ -364,29 +361,19 @@ out_mapmr_err:
* @cq: completion queue
* @wc: WCE for a completed FastReg WR
*
+ * Each flushed MR gets destroyed after the QP has drained.
*/
static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
{
struct ib_cqe *cqe = wc->wr_cqe;
- struct rpcrdma_frwr *frwr =
- container_of(cqe, struct rpcrdma_frwr, fr_cqe);
+ struct rpcrdma_mr *mr = container_of(cqe, struct rpcrdma_mr, mr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */
- trace_xprtrdma_wc_fastreg(wc, &frwr->fr_cid);
- /* The MR will get recycled when the associated req is retransmitted */
+ trace_xprtrdma_wc_fastreg(wc, &mr->mr_cid);
rpcrdma_flush_disconnect(cq->cq_context, wc);
}
-static void frwr_cid_init(struct rpcrdma_ep *ep,
- struct rpcrdma_frwr *frwr)
-{
- struct rpc_rdma_cid *cid = &frwr->fr_cid;
-
- cid->ci_queue_id = ep->re_attr.send_cq->res.id;
- cid->ci_completion_id = frwr->fr_mr->res.id;
-}
-
/**
* frwr_send - post Send WRs containing the RPC Call message
* @r_xprt: controlling transport instance
@@ -403,27 +390,36 @@ static void frwr_cid_init(struct rpcrdma_ep *ep,
*/
int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{
+ struct ib_send_wr *post_wr, *send_wr = &req->rl_wr;
struct rpcrdma_ep *ep = r_xprt->rx_ep;
- struct ib_send_wr *post_wr;
struct rpcrdma_mr *mr;
+ unsigned int num_wrs;
- post_wr = &req->rl_wr;
+ num_wrs = 1;
+ post_wr = send_wr;
list_for_each_entry(mr, &req->rl_registered, mr_list) {
- struct rpcrdma_frwr *frwr;
-
- frwr = &mr->frwr;
-
- frwr->fr_cqe.done = frwr_wc_fastreg;
- frwr_cid_init(ep, frwr);
- frwr->fr_regwr.wr.next = post_wr;
- frwr->fr_regwr.wr.wr_cqe = &frwr->fr_cqe;
- frwr->fr_regwr.wr.num_sge = 0;
- frwr->fr_regwr.wr.opcode = IB_WR_REG_MR;
- frwr->fr_regwr.wr.send_flags = 0;
+ trace_xprtrdma_mr_fastreg(mr);
+
+ mr->mr_cqe.done = frwr_wc_fastreg;
+ mr->mr_regwr.wr.next = post_wr;
+ mr->mr_regwr.wr.wr_cqe = &mr->mr_cqe;
+ mr->mr_regwr.wr.num_sge = 0;
+ mr->mr_regwr.wr.opcode = IB_WR_REG_MR;
+ mr->mr_regwr.wr.send_flags = 0;
+ post_wr = &mr->mr_regwr.wr;
+ ++num_wrs;
+ }
- post_wr = &frwr->fr_regwr.wr;
+ if ((kref_read(&req->rl_kref) > 1) || num_wrs > ep->re_send_count) {
+ send_wr->send_flags |= IB_SEND_SIGNALED;
+ ep->re_send_count = min_t(unsigned int, ep->re_send_batch,
+ num_wrs - ep->re_send_count);
+ } else {
+ send_wr->send_flags &= ~IB_SEND_SIGNALED;
+ ep->re_send_count -= num_wrs;
}
+ trace_xprtrdma_post_send(req);
return ib_post_send(ep->re_id->qp, post_wr, NULL);
}
@@ -440,6 +436,7 @@ void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
list_for_each_entry(mr, mrs, mr_list)
if (mr->mr_handle == rep->rr_inv_rkey) {
list_del_init(&mr->mr_list);
+ trace_xprtrdma_mr_reminv(mr);
frwr_mr_put(mr);
break; /* only one invalidated MR per RPC */
}
@@ -447,9 +444,7 @@ void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
static void frwr_mr_done(struct ib_wc *wc, struct rpcrdma_mr *mr)
{
- if (wc->status != IB_WC_SUCCESS)
- frwr_mr_recycle(mr);
- else
+ if (likely(wc->status == IB_WC_SUCCESS))
frwr_mr_put(mr);
}
@@ -462,12 +457,10 @@ static void frwr_mr_done(struct ib_wc *wc, struct rpcrdma_mr *mr)
static void frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
{
struct ib_cqe *cqe = wc->wr_cqe;
- struct rpcrdma_frwr *frwr =
- container_of(cqe, struct rpcrdma_frwr, fr_cqe);
- struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
+ struct rpcrdma_mr *mr = container_of(cqe, struct rpcrdma_mr, mr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */
- trace_xprtrdma_wc_li(wc, &frwr->fr_cid);
+ trace_xprtrdma_wc_li(wc, &mr->mr_cid);
frwr_mr_done(wc, mr);
rpcrdma_flush_disconnect(cq->cq_context, wc);
@@ -483,14 +476,12 @@ static void frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
{
struct ib_cqe *cqe = wc->wr_cqe;
- struct rpcrdma_frwr *frwr =
- container_of(cqe, struct rpcrdma_frwr, fr_cqe);
- struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
+ struct rpcrdma_mr *mr = container_of(cqe, struct rpcrdma_mr, mr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */
- trace_xprtrdma_wc_li_wake(wc, &frwr->fr_cid);
+ trace_xprtrdma_wc_li_wake(wc, &mr->mr_cid);
frwr_mr_done(wc, mr);
- complete(&frwr->fr_linv_done);
+ complete(&mr->mr_linv_done);
rpcrdma_flush_disconnect(cq->cq_context, wc);
}
@@ -511,7 +502,6 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
struct ib_send_wr *first, **prev, *last;
struct rpcrdma_ep *ep = r_xprt->rx_ep;
const struct ib_send_wr *bad_wr;
- struct rpcrdma_frwr *frwr;
struct rpcrdma_mr *mr;
int rc;
@@ -520,35 +510,34 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
* Chain the LOCAL_INV Work Requests and post them with
* a single ib_post_send() call.
*/
- frwr = NULL;
prev = &first;
while ((mr = rpcrdma_mr_pop(&req->rl_registered))) {
trace_xprtrdma_mr_localinv(mr);
r_xprt->rx_stats.local_inv_needed++;
- frwr = &mr->frwr;
- frwr->fr_cqe.done = frwr_wc_localinv;
- frwr_cid_init(ep, frwr);
- last = &frwr->fr_invwr;
+ last = &mr->mr_invwr;
last->next = NULL;
- last->wr_cqe = &frwr->fr_cqe;
+ last->wr_cqe = &mr->mr_cqe;
last->sg_list = NULL;
last->num_sge = 0;
last->opcode = IB_WR_LOCAL_INV;
last->send_flags = IB_SEND_SIGNALED;
last->ex.invalidate_rkey = mr->mr_handle;
+ last->wr_cqe->done = frwr_wc_localinv;
+
*prev = last;
prev = &last->next;
}
+ mr = container_of(last, struct rpcrdma_mr, mr_invwr);
/* Strong send queue ordering guarantees that when the
* last WR in the chain completes, all WRs in the chain
* are complete.
*/
- frwr->fr_cqe.done = frwr_wc_localinv_wake;
- reinit_completion(&frwr->fr_linv_done);
+ last->wr_cqe->done = frwr_wc_localinv_wake;
+ reinit_completion(&mr->mr_linv_done);
/* Transport disconnect drains the receive CQ before it
* replaces the QP. The RPC reply handler won't call us
@@ -562,22 +551,12 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
* not happen, so don't wait in that case.
*/
if (bad_wr != first)
- wait_for_completion(&frwr->fr_linv_done);
+ wait_for_completion(&mr->mr_linv_done);
if (!rc)
return;
- /* Recycle MRs in the LOCAL_INV chain that did not get posted.
- */
+ /* On error, the MRs get destroyed once the QP has drained. */
trace_xprtrdma_post_linv_err(req, rc);
- while (bad_wr) {
- frwr = container_of(bad_wr, struct rpcrdma_frwr,
- fr_invwr);
- mr = container_of(frwr, struct rpcrdma_mr, frwr);
- bad_wr = bad_wr->next;
-
- list_del_init(&mr->mr_list);
- frwr_mr_recycle(mr);
- }
}
/**
@@ -589,20 +568,24 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
static void frwr_wc_localinv_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct ib_cqe *cqe = wc->wr_cqe;
- struct rpcrdma_frwr *frwr =
- container_of(cqe, struct rpcrdma_frwr, fr_cqe);
- struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
- struct rpcrdma_rep *rep = mr->mr_req->rl_reply;
+ struct rpcrdma_mr *mr = container_of(cqe, struct rpcrdma_mr, mr_cqe);
+ struct rpcrdma_rep *rep;
/* WARNING: Only wr_cqe and status are reliable at this point */
- trace_xprtrdma_wc_li_done(wc, &frwr->fr_cid);
- frwr_mr_done(wc, mr);
+ trace_xprtrdma_wc_li_done(wc, &mr->mr_cid);
- /* Ensure @rep is generated before frwr_mr_done */
+ /* Ensure that @rep is generated before the MR is released */
+ rep = mr->mr_req->rl_reply;
smp_rmb();
- rpcrdma_complete_rqst(rep);
- rpcrdma_flush_disconnect(cq->cq_context, wc);
+ if (wc->status != IB_WC_SUCCESS) {
+ if (rep)
+ rpcrdma_unpin_rqst(rep);
+ rpcrdma_flush_disconnect(cq->cq_context, wc);
+ return;
+ }
+ frwr_mr_put(mr);
+ rpcrdma_complete_rqst(rep);
}
/**
@@ -619,33 +602,29 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{
struct ib_send_wr *first, *last, **prev;
struct rpcrdma_ep *ep = r_xprt->rx_ep;
- const struct ib_send_wr *bad_wr;
- struct rpcrdma_frwr *frwr;
struct rpcrdma_mr *mr;
int rc;
/* Chain the LOCAL_INV Work Requests and post them with
* a single ib_post_send() call.
*/
- frwr = NULL;
prev = &first;
while ((mr = rpcrdma_mr_pop(&req->rl_registered))) {
trace_xprtrdma_mr_localinv(mr);
r_xprt->rx_stats.local_inv_needed++;
- frwr = &mr->frwr;
- frwr->fr_cqe.done = frwr_wc_localinv;
- frwr_cid_init(ep, frwr);
- last = &frwr->fr_invwr;
+ last = &mr->mr_invwr;
last->next = NULL;
- last->wr_cqe = &frwr->fr_cqe;
+ last->wr_cqe = &mr->mr_cqe;
last->sg_list = NULL;
last->num_sge = 0;
last->opcode = IB_WR_LOCAL_INV;
last->send_flags = IB_SEND_SIGNALED;
last->ex.invalidate_rkey = mr->mr_handle;
+ last->wr_cqe->done = frwr_wc_localinv;
+
*prev = last;
prev = &last->next;
}
@@ -655,31 +634,23 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
* are complete. The last completion will wake up the
* RPC waiter.
*/
- frwr->fr_cqe.done = frwr_wc_localinv_done;
+ last->wr_cqe->done = frwr_wc_localinv_done;
/* Transport disconnect drains the receive CQ before it
* replaces the QP. The RPC reply handler won't call us
* unless re_id->qp is a valid pointer.
*/
- bad_wr = NULL;
- rc = ib_post_send(ep->re_id->qp, first, &bad_wr);
+ rc = ib_post_send(ep->re_id->qp, first, NULL);
if (!rc)
return;
- /* Recycle MRs in the LOCAL_INV chain that did not get posted.
- */
+ /* On error, the MRs get destroyed once the QP has drained. */
trace_xprtrdma_post_linv_err(req, rc);
- while (bad_wr) {
- frwr = container_of(bad_wr, struct rpcrdma_frwr, fr_invwr);
- mr = container_of(frwr, struct rpcrdma_mr, frwr);
- bad_wr = bad_wr->next;
-
- frwr_mr_recycle(mr);
- }
/* The final LOCAL_INV WR in the chain is supposed to
- * do the wake. If it was never posted, the wake will
- * not happen, so wake here in that case.
+ * do the wake. If it was never posted, the wake does
+ * not happen. Unpin the rqst in preparation for its
+ * retransmission.
*/
- rpcrdma_complete_rqst(req->rl_reply);
+ rpcrdma_unpin_rqst(req->rl_reply);
}
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 292f066d006e..649f7d8b9733 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -1326,9 +1326,35 @@ rpcrdma_decode_error(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
return -EIO;
}
-/* Perform XID lookup, reconstruction of the RPC reply, and
- * RPC completion while holding the transport lock to ensure
- * the rep, rqst, and rq_task pointers remain stable.
+/**
+ * rpcrdma_unpin_rqst - Release rqst without completing it
+ * @rep: RPC/RDMA Receive context
+ *
+ * This is done when a connection is lost so that a Reply
+ * can be dropped and its matching Call can be subsequently
+ * retransmitted on a new connection.
+ */
+void rpcrdma_unpin_rqst(struct rpcrdma_rep *rep)
+{
+ struct rpc_xprt *xprt = &rep->rr_rxprt->rx_xprt;
+ struct rpc_rqst *rqst = rep->rr_rqst;
+ struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+
+ req->rl_reply = NULL;
+ rep->rr_rqst = NULL;
+
+ spin_lock(&xprt->queue_lock);
+ xprt_unpin_rqst(rqst);
+ spin_unlock(&xprt->queue_lock);
+}
+
+/**
+ * rpcrdma_complete_rqst - Pass completed rqst back to RPC
+ * @rep: RPC/RDMA Receive context
+ *
+ * Reconstruct the RPC reply and complete the transaction
+ * while @rqst is still pinned to ensure the rep, rqst, and
+ * rq_task pointers remain stable.
*/
void rpcrdma_complete_rqst(struct rpcrdma_rep *rep)
{
@@ -1430,13 +1456,14 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
credits = 1; /* don't deadlock */
else if (credits > r_xprt->rx_ep->re_max_requests)
credits = r_xprt->rx_ep->re_max_requests;
+ rpcrdma_post_recvs(r_xprt, credits + (buf->rb_bc_srv_max_requests << 1),
+ false);
if (buf->rb_credits != credits)
rpcrdma_update_cwnd(r_xprt, credits);
- rpcrdma_post_recvs(r_xprt, false);
req = rpcr_to_rdmar(rqst);
if (unlikely(req->rl_reply))
- rpcrdma_recv_buffer_put(req->rl_reply);
+ rpcrdma_rep_put(buf, req->rl_reply);
req->rl_reply = rep;
rep->rr_rqst = rqst;
@@ -1464,5 +1491,5 @@ out_shortreply:
trace_xprtrdma_reply_short_err(rep);
out:
- rpcrdma_recv_buffer_put(rep);
+ rpcrdma_rep_put(buf, rep);
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index 4a1edbb4028e..16897fcb659c 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -93,7 +93,13 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
*/
get_page(virt_to_page(rqst->rq_buffer));
sctxt->sc_send_wr.opcode = IB_WR_SEND;
- return svc_rdma_send(rdma, sctxt);
+ ret = svc_rdma_send(rdma, sctxt);
+ if (ret < 0)
+ return ret;
+
+ ret = wait_for_completion_killable(&sctxt->sc_done);
+ svc_rdma_send_ctxt_put(rdma, sctxt);
+ return ret;
}
/* Server-side transport endpoint wants a whole page for its send
@@ -252,9 +258,9 @@ xprt_setup_rdma_bc(struct xprt_create *args)
xprt->timeout = &xprt_rdma_bc_timeout;
xprt_set_bound(xprt);
xprt_set_connected(xprt);
- xprt->bind_timeout = RPCRDMA_BIND_TO;
- xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
- xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
+ xprt->bind_timeout = 0;
+ xprt->reestablish_timeout = 0;
+ xprt->idle_timeout = 0;
xprt->prot = XPRT_TRANSPORT_BC_RDMA;
xprt->ops = &xprt_rdma_bc_procs;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 6d28f23ceb35..6be23ce7a93d 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -89,8 +89,7 @@
* svc_rdma_recvfrom call returns.
*
* During the second svc_rdma_recvfrom call, RDMA Read sink pages
- * are transferred from the svc_rdma_recv_ctxt to the second svc_rqst
- * (see rdma_read_complete() below).
+ * are transferred from the svc_rdma_recv_ctxt to the second svc_rqst.
*/
#include <linux/slab.h>
@@ -107,8 +106,6 @@
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
-#define RPCDBG_FACILITY RPCDBG_SVCXPRT
-
static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc);
static inline struct svc_rdma_recv_ctxt *
@@ -230,11 +227,6 @@ out_empty:
void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
struct svc_rdma_recv_ctxt *ctxt)
{
- unsigned int i;
-
- for (i = 0; i < ctxt->rc_page_count; i++)
- put_page(ctxt->rc_pages[i]);
-
pcl_free(&ctxt->rc_call_pcl);
pcl_free(&ctxt->rc_read_pcl);
pcl_free(&ctxt->rc_write_pcl);
@@ -274,6 +266,9 @@ static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma,
struct ib_recv_wr *recv_chain;
int ret;
+ if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags))
+ return false;
+
recv_chain = NULL;
while (wanted--) {
ctxt = svc_rdma_recv_ctxt_get(rdma);
@@ -291,18 +286,17 @@ static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma,
ret = ib_post_recv(rdma->sc_qp, recv_chain, &bad_wr);
if (ret)
- goto err_post;
+ goto err_free;
return true;
-err_post:
+err_free:
+ trace_svcrdma_rq_post_err(rdma, ret);
while (bad_wr) {
ctxt = container_of(bad_wr, struct svc_rdma_recv_ctxt,
rc_recv_wr);
bad_wr = bad_wr->next;
svc_rdma_recv_ctxt_put(rdma, ctxt);
}
-
- trace_svcrdma_rq_post_err(rdma, ret);
/* Since we're destroying the xprt, no need to reset
* sc_pending_recvs. */
return false;
@@ -340,6 +334,19 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
if (wc->status != IB_WC_SUCCESS)
goto flushed;
+ /* If receive posting fails, the connection is about to be
+ * lost anyway. The server will not be able to send a reply
+ * for this RPC, and the client will retransmit this RPC
+ * anyway when it reconnects.
+ *
+ * Therefore we drop the Receive, even if status was SUCCESS
+ * to reduce the likelihood of replayed requests once the
+ * client reconnects.
+ */
+ if (rdma->sc_pending_recvs < rdma->sc_max_requests)
+ if (!svc_rdma_refresh_recvs(rdma, rdma->sc_recv_batch, false))
+ goto flushed;
+
/* All wc fields are now known to be valid */
ctxt->rc_byte_len = wc->byte_len;
@@ -350,20 +357,11 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
spin_unlock(&rdma->sc_rq_dto_lock);
if (!test_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags))
svc_xprt_enqueue(&rdma->sc_xprt);
-
- if (!test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags) &&
- rdma->sc_pending_recvs < rdma->sc_max_requests)
- if (!svc_rdma_refresh_recvs(rdma, RPCRDMA_MAX_RECV_BATCH,
- false))
- goto post_err;
-
return;
flushed:
svc_rdma_recv_ctxt_put(rdma, ctxt);
-post_err:
- set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
- svc_xprt_enqueue(&rdma->sc_xprt);
+ svc_xprt_deferred_close(&rdma->sc_xprt);
}
/**
@@ -375,10 +373,6 @@ void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma)
{
struct svc_rdma_recv_ctxt *ctxt;
- while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_read_complete_q))) {
- list_del(&ctxt->rc_list);
- svc_rdma_recv_ctxt_put(rdma, ctxt);
- }
while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_rq_dto_q))) {
list_del(&ctxt->rc_list);
svc_rdma_recv_ctxt_put(rdma, ctxt);
@@ -716,35 +710,6 @@ out_inval:
return -EINVAL;
}
-static void rdma_read_complete(struct svc_rqst *rqstp,
- struct svc_rdma_recv_ctxt *head)
-{
- int page_no;
-
- /* Move Read chunk pages to rqstp so that they will be released
- * when svc_process is done with them.
- */
- for (page_no = 0; page_no < head->rc_page_count; page_no++) {
- put_page(rqstp->rq_pages[page_no]);
- rqstp->rq_pages[page_no] = head->rc_pages[page_no];
- }
- head->rc_page_count = 0;
-
- /* Point rq_arg.pages past header */
- rqstp->rq_arg.pages = &rqstp->rq_pages[head->rc_hdr_count];
- rqstp->rq_arg.page_len = head->rc_arg.page_len;
-
- /* rq_respages starts after the last arg page */
- rqstp->rq_respages = &rqstp->rq_pages[page_no];
- rqstp->rq_next_page = rqstp->rq_respages + 1;
-
- /* Rebuild rq_arg head and tail. */
- rqstp->rq_arg.head[0] = head->rc_arg.head[0];
- rqstp->rq_arg.tail[0] = head->rc_arg.tail[0];
- rqstp->rq_arg.len = head->rc_arg.len;
- rqstp->rq_arg.buflen = head->rc_arg.buflen;
-}
-
static void svc_rdma_send_error(struct svcxprt_rdma *rdma,
struct svc_rdma_recv_ctxt *rctxt,
int status)
@@ -829,25 +794,22 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
rqstp->rq_xprt_ctxt = NULL;
+ ctxt = NULL;
spin_lock(&rdma_xprt->sc_rq_dto_lock);
- ctxt = svc_rdma_next_recv_ctxt(&rdma_xprt->sc_read_complete_q);
- if (ctxt) {
- list_del(&ctxt->rc_list);
- spin_unlock(&rdma_xprt->sc_rq_dto_lock);
- rdma_read_complete(rqstp, ctxt);
- goto complete;
- }
ctxt = svc_rdma_next_recv_ctxt(&rdma_xprt->sc_rq_dto_q);
- if (!ctxt) {
+ if (ctxt)
+ list_del(&ctxt->rc_list);
+ else
/* No new incoming requests, terminate the loop */
clear_bit(XPT_DATA, &xprt->xpt_flags);
- spin_unlock(&rdma_xprt->sc_rq_dto_lock);
- return 0;
- }
- list_del(&ctxt->rc_list);
spin_unlock(&rdma_xprt->sc_rq_dto_lock);
- percpu_counter_inc(&svcrdma_stat_recv);
+ /* Unblock the transport for the next receive */
+ svc_xprt_received(xprt);
+ if (!ctxt)
+ return 0;
+
+ percpu_counter_inc(&svcrdma_stat_recv);
ib_dma_sync_single_for_cpu(rdma_xprt->sc_pd->device,
ctxt->rc_recv_sge.addr, ctxt->rc_byte_len,
DMA_FROM_DEVICE);
@@ -872,21 +834,17 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
svc_rdma_get_inv_rkey(rdma_xprt, ctxt);
if (!pcl_is_empty(&ctxt->rc_read_pcl) ||
- !pcl_is_empty(&ctxt->rc_call_pcl))
- goto out_readlist;
+ !pcl_is_empty(&ctxt->rc_call_pcl)) {
+ ret = svc_rdma_process_read_list(rdma_xprt, rqstp, ctxt);
+ if (ret < 0)
+ goto out_readfail;
+ }
-complete:
rqstp->rq_xprt_ctxt = ctxt;
rqstp->rq_prot = IPPROTO_MAX;
svc_xprt_copy_addrs(rqstp, xprt);
return rqstp->rq_arg.len;
-out_readlist:
- ret = svc_rdma_process_read_list(rdma_xprt, rqstp, ctxt);
- if (ret < 0)
- goto out_readfail;
- return 0;
-
out_err:
svc_rdma_send_error(rdma_xprt, ctxt, ret);
svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 693d139a8633..5238bc829235 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -150,6 +150,8 @@ struct svc_rdma_chunk_ctxt {
struct svcxprt_rdma *cc_rdma;
struct list_head cc_rwctxts;
int cc_sqecount;
+ enum ib_wc_status cc_status;
+ struct completion cc_done;
};
static void svc_rdma_cc_cid_init(struct svcxprt_rdma *rdma,
@@ -250,7 +252,7 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
wake_up(&rdma->sc_send_wait);
if (unlikely(wc->status != IB_WC_SUCCESS))
- set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
+ svc_xprt_deferred_close(&rdma->sc_xprt);
svc_rdma_write_info_free(info);
}
@@ -299,29 +301,15 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc)
struct svc_rdma_chunk_ctxt *cc =
container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe);
struct svcxprt_rdma *rdma = cc->cc_rdma;
- struct svc_rdma_read_info *info =
- container_of(cc, struct svc_rdma_read_info, ri_cc);
trace_svcrdma_wc_read(wc, &cc->cc_cid);
atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
wake_up(&rdma->sc_send_wait);
- if (unlikely(wc->status != IB_WC_SUCCESS)) {
- set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
- svc_rdma_recv_ctxt_put(rdma, info->ri_readctxt);
- } else {
- spin_lock(&rdma->sc_rq_dto_lock);
- list_add_tail(&info->ri_readctxt->rc_list,
- &rdma->sc_read_complete_q);
- /* Note the unlock pairs with the smp_rmb in svc_xprt_ready: */
- set_bit(XPT_DATA, &rdma->sc_xprt.xpt_flags);
- spin_unlock(&rdma->sc_rq_dto_lock);
-
- svc_xprt_enqueue(&rdma->sc_xprt);
- }
-
- svc_rdma_read_info_free(info);
+ cc->cc_status = wc->status;
+ complete(&cc->cc_done);
+ return;
}
/* This function sleeps when the transport's Send Queue is congested.
@@ -334,7 +322,6 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc)
static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc)
{
struct svcxprt_rdma *rdma = cc->cc_rdma;
- struct svc_xprt *xprt = &rdma->sc_xprt;
struct ib_send_wr *first_wr;
const struct ib_send_wr *bad_wr;
struct list_head *tmp;
@@ -373,7 +360,7 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc)
} while (1);
trace_svcrdma_sq_post_err(rdma, ret);
- set_bit(XPT_CLOSE, &xprt->xpt_flags);
+ svc_xprt_deferred_close(&rdma->sc_xprt);
/* If even one was posted, there will be a completion. */
if (bad_wr != first_wr)
@@ -677,8 +664,8 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info,
struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
struct svc_rdma_chunk_ctxt *cc = &info->ri_cc;
struct svc_rqst *rqstp = info->ri_rqst;
- struct svc_rdma_rw_ctxt *ctxt;
unsigned int sge_no, seg_len, len;
+ struct svc_rdma_rw_ctxt *ctxt;
struct scatterlist *sg;
int ret;
@@ -694,8 +681,6 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info,
seg_len = min_t(unsigned int, len,
PAGE_SIZE - info->ri_pageoff);
- head->rc_arg.pages[info->ri_pageno] =
- rqstp->rq_pages[info->ri_pageno];
if (!info->ri_pageoff)
head->rc_page_count++;
@@ -789,12 +774,10 @@ static int svc_rdma_copy_inline_range(struct svc_rdma_read_info *info,
page_len = min_t(unsigned int, remaining,
PAGE_SIZE - info->ri_pageoff);
- head->rc_arg.pages[info->ri_pageno] =
- rqstp->rq_pages[info->ri_pageno];
if (!info->ri_pageoff)
head->rc_page_count++;
- dst = page_address(head->rc_arg.pages[info->ri_pageno]);
+ dst = page_address(rqstp->rq_pages[info->ri_pageno]);
memcpy(dst + info->ri_pageno, src + offset, page_len);
info->ri_totalbytes += page_len;
@@ -814,7 +797,7 @@ static int svc_rdma_copy_inline_range(struct svc_rdma_read_info *info,
* svc_rdma_read_multiple_chunks - Construct RDMA Reads to pull data item Read chunks
* @info: context for RDMA Reads
*
- * The chunk data lands in head->rc_arg as a series of contiguous pages,
+ * The chunk data lands in rqstp->rq_arg as a series of contiguous pages,
* like an incoming TCP call.
*
* Return values:
@@ -828,8 +811,8 @@ static noinline int svc_rdma_read_multiple_chunks(struct svc_rdma_read_info *inf
{
struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
const struct svc_rdma_pcl *pcl = &head->rc_read_pcl;
+ struct xdr_buf *buf = &info->ri_rqst->rq_arg;
struct svc_rdma_chunk *chunk, *next;
- struct xdr_buf *buf = &head->rc_arg;
unsigned int start, length;
int ret;
@@ -865,9 +848,9 @@ static noinline int svc_rdma_read_multiple_chunks(struct svc_rdma_read_info *inf
buf->len += info->ri_totalbytes;
buf->buflen += info->ri_totalbytes;
- head->rc_hdr_count = 1;
- buf->head[0].iov_base = page_address(head->rc_pages[0]);
+ buf->head[0].iov_base = page_address(info->ri_rqst->rq_pages[0]);
buf->head[0].iov_len = min_t(size_t, PAGE_SIZE, info->ri_totalbytes);
+ buf->pages = &info->ri_rqst->rq_pages[1];
buf->page_len = info->ri_totalbytes - buf->head[0].iov_len;
return 0;
}
@@ -876,9 +859,9 @@ static noinline int svc_rdma_read_multiple_chunks(struct svc_rdma_read_info *inf
* svc_rdma_read_data_item - Construct RDMA Reads to pull data item Read chunks
* @info: context for RDMA Reads
*
- * The chunk data lands in the page list of head->rc_arg.pages.
+ * The chunk data lands in the page list of rqstp->rq_arg.pages.
*
- * Currently NFSD does not look at the head->rc_arg.tail[0] iovec.
+ * Currently NFSD does not look at the rqstp->rq_arg.tail[0] kvec.
* Therefore, XDR round-up of the Read chunk and trailing
* inline content must both be added at the end of the pagelist.
*
@@ -892,7 +875,7 @@ static noinline int svc_rdma_read_multiple_chunks(struct svc_rdma_read_info *inf
static int svc_rdma_read_data_item(struct svc_rdma_read_info *info)
{
struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
- struct xdr_buf *buf = &head->rc_arg;
+ struct xdr_buf *buf = &info->ri_rqst->rq_arg;
struct svc_rdma_chunk *chunk;
unsigned int length;
int ret;
@@ -902,8 +885,6 @@ static int svc_rdma_read_data_item(struct svc_rdma_read_info *info)
if (ret < 0)
goto out;
- head->rc_hdr_count = 0;
-
/* Split the Receive buffer between the head and tail
* buffers at Read chunk's position. XDR roundup of the
* chunk is not included in either the pagelist or in
@@ -922,7 +903,8 @@ static int svc_rdma_read_data_item(struct svc_rdma_read_info *info)
* Currently these chunks always start at page offset 0,
* thus the rounded-up length never crosses a page boundary.
*/
- length = XDR_QUADLEN(info->ri_totalbytes) << 2;
+ buf->pages = &info->ri_rqst->rq_pages[0];
+ length = xdr_align_size(chunk->ch_length);
buf->page_len = length;
buf->len += length;
buf->buflen += length;
@@ -1034,8 +1016,7 @@ static int svc_rdma_read_call_chunk(struct svc_rdma_read_info *info)
* @info: context for RDMA Reads
*
* The start of the data lands in the first page just after the
- * Transport header, and the rest lands in the page list of
- * head->rc_arg.pages.
+ * Transport header, and the rest lands in rqstp->rq_arg.pages.
*
* Assumptions:
* - A PZRC is never sent in an RDMA_MSG message, though it's
@@ -1050,8 +1031,7 @@ static int svc_rdma_read_call_chunk(struct svc_rdma_read_info *info)
*/
static noinline int svc_rdma_read_special(struct svc_rdma_read_info *info)
{
- struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
- struct xdr_buf *buf = &head->rc_arg;
+ struct xdr_buf *buf = &info->ri_rqst->rq_arg;
int ret;
ret = svc_rdma_read_call_chunk(info);
@@ -1061,35 +1041,15 @@ static noinline int svc_rdma_read_special(struct svc_rdma_read_info *info)
buf->len += info->ri_totalbytes;
buf->buflen += info->ri_totalbytes;
- head->rc_hdr_count = 1;
- buf->head[0].iov_base = page_address(head->rc_pages[0]);
+ buf->head[0].iov_base = page_address(info->ri_rqst->rq_pages[0]);
buf->head[0].iov_len = min_t(size_t, PAGE_SIZE, info->ri_totalbytes);
+ buf->pages = &info->ri_rqst->rq_pages[1];
buf->page_len = info->ri_totalbytes - buf->head[0].iov_len;
out:
return ret;
}
-/* Pages under I/O have been copied to head->rc_pages. Ensure they
- * are not released by svc_xprt_release() until the I/O is complete.
- *
- * This has to be done after all Read WRs are constructed to properly
- * handle a page that is part of I/O on behalf of two different RDMA
- * segments.
- *
- * Do this only if I/O has been posted. Otherwise, we do indeed want
- * svc_xprt_release() to clean things up properly.
- */
-static void svc_rdma_save_io_pages(struct svc_rqst *rqstp,
- const unsigned int start,
- const unsigned int num_pages)
-{
- unsigned int i;
-
- for (i = start; i < num_pages + start; i++)
- rqstp->rq_pages[i] = NULL;
-}
-
/**
* svc_rdma_process_read_list - Pull list of Read chunks from the client
* @rdma: controlling RDMA transport
@@ -1121,18 +1081,6 @@ int svc_rdma_process_read_list(struct svcxprt_rdma *rdma,
struct svc_rdma_chunk_ctxt *cc;
int ret;
- /* The request (with page list) is constructed in
- * head->rc_arg. Pages involved with RDMA Read I/O are
- * transferred there.
- */
- head->rc_arg.head[0] = rqstp->rq_arg.head[0];
- head->rc_arg.tail[0] = rqstp->rq_arg.tail[0];
- head->rc_arg.pages = head->rc_pages;
- head->rc_arg.page_base = 0;
- head->rc_arg.page_len = 0;
- head->rc_arg.len = rqstp->rq_arg.len;
- head->rc_arg.buflen = rqstp->rq_arg.buflen;
-
info = svc_rdma_read_info_alloc(rdma);
if (!info)
return -ENOMEM;
@@ -1154,11 +1102,22 @@ int svc_rdma_process_read_list(struct svcxprt_rdma *rdma,
goto out_err;
trace_svcrdma_post_read_chunk(&cc->cc_cid, cc->cc_sqecount);
+ init_completion(&cc->cc_done);
ret = svc_rdma_post_chunk_ctxt(cc);
if (ret < 0)
goto out_err;
- svc_rdma_save_io_pages(rqstp, 0, head->rc_page_count);
- return 1;
+
+ ret = 1;
+ wait_for_completion(&cc->cc_done);
+ if (cc->cc_status != IB_WC_SUCCESS)
+ ret = -EIO;
+
+ /* rq_respages starts after the last arg page */
+ rqstp->rq_respages = &rqstp->rq_pages[head->rc_page_count];
+ rqstp->rq_next_page = rqstp->rq_respages + 1;
+
+ /* Ensure svc_rdma_recv_ctxt_put() does not try to release pages */
+ head->rc_page_count = 0;
out_err:
svc_rdma_read_info_free(info);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 52c759a8543e..d6bbafb773e1 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -111,8 +111,6 @@
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
-#define RPCDBG_FACILITY RPCDBG_SVCXPRT
-
static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc);
static inline struct svc_rdma_send_ctxt *
@@ -157,6 +155,7 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
ctxt->sc_send_wr.wr_cqe = &ctxt->sc_cqe;
ctxt->sc_send_wr.sg_list = ctxt->sc_sges;
ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED;
+ init_completion(&ctxt->sc_done);
ctxt->sc_cqe.done = svc_rdma_wc_send;
ctxt->sc_xprt_buf = buffer;
xdr_buf_init(&ctxt->sc_hdrbuf, ctxt->sc_xprt_buf,
@@ -220,7 +219,6 @@ out:
ctxt->sc_send_wr.num_sge = 0;
ctxt->sc_cur_sge_no = 0;
- ctxt->sc_page_count = 0;
return ctxt;
out_empty:
@@ -235,8 +233,6 @@ out_empty:
* svc_rdma_send_ctxt_put - Return send_ctxt to free list
* @rdma: controlling svcxprt_rdma
* @ctxt: object to return to the free list
- *
- * Pages left in sc_pages are DMA unmapped and released.
*/
void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt)
@@ -257,9 +253,6 @@ void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
ctxt->sc_sges[i].length);
}
- for (i = 0; i < ctxt->sc_page_count; ++i)
- put_page(ctxt->sc_pages[i]);
-
spin_lock(&rdma->sc_send_lock);
list_add(&ctxt->sc_list, &rdma->sc_send_ctxts);
spin_unlock(&rdma->sc_send_lock);
@@ -282,15 +275,13 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
trace_svcrdma_wc_send(wc, &ctxt->sc_cid);
+ complete(&ctxt->sc_done);
+
atomic_inc(&rdma->sc_sq_avail);
wake_up(&rdma->sc_send_wait);
- svc_rdma_send_ctxt_put(rdma, ctxt);
-
- if (unlikely(wc->status != IB_WC_SUCCESS)) {
- set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
- svc_xprt_enqueue(&rdma->sc_xprt);
- }
+ if (unlikely(wc->status != IB_WC_SUCCESS))
+ svc_xprt_deferred_close(&rdma->sc_xprt);
}
/**
@@ -298,7 +289,7 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
* @rdma: transport on which to post the WR
* @ctxt: send ctxt with a Send WR ready to post
*
- * Returns zero the Send WR was posted successfully. Otherwise, a
+ * Returns zero if the Send WR was posted successfully. Otherwise, a
* negative errno is returned.
*/
int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt)
@@ -306,7 +297,7 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt)
struct ib_send_wr *wr = &ctxt->sc_send_wr;
int ret;
- might_sleep();
+ reinit_completion(&ctxt->sc_done);
/* Sync the transport header buffer */
ib_dma_sync_single_for_device(rdma->sc_pd->device,
@@ -336,7 +327,7 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt)
}
trace_svcrdma_sq_post_err(rdma, ret);
- set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
+ svc_xprt_deferred_close(&rdma->sc_xprt);
wake_up(&rdma->sc_send_wait);
return ret;
}
@@ -795,25 +786,6 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
svc_rdma_xb_dma_map, &args);
}
-/* The svc_rqst and all resources it owns are released as soon as
- * svc_rdma_sendto returns. Transfer pages under I/O to the ctxt
- * so they are released by the Send completion handler.
- */
-static void svc_rdma_save_io_pages(struct svc_rqst *rqstp,
- struct svc_rdma_send_ctxt *ctxt)
-{
- int i, pages = rqstp->rq_next_page - rqstp->rq_respages;
-
- ctxt->sc_page_count += pages;
- for (i = 0; i < pages; i++) {
- ctxt->sc_pages[i] = rqstp->rq_respages[i];
- rqstp->rq_respages[i] = NULL;
- }
-
- /* Prevent svc_xprt_release from releasing pages in rq_pages */
- rqstp->rq_next_page = rqstp->rq_respages;
-}
-
/* Prepare the portion of the RPC Reply that will be transmitted
* via RDMA Send. The RPC-over-RDMA transport header is prepared
* in sc_sges[0], and the RPC xdr_buf is prepared in following sges.
@@ -843,15 +815,20 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
if (ret < 0)
return ret;
- svc_rdma_save_io_pages(rqstp, sctxt);
-
if (rctxt->rc_inv_rkey) {
sctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV;
sctxt->sc_send_wr.ex.invalidate_rkey = rctxt->rc_inv_rkey;
} else {
sctxt->sc_send_wr.opcode = IB_WR_SEND;
}
- return svc_rdma_send(rdma, sctxt);
+
+ ret = svc_rdma_send(rdma, sctxt);
+ if (ret < 0)
+ return ret;
+
+ ret = wait_for_completion_killable(&sctxt->sc_done);
+ svc_rdma_send_ctxt_put(rdma, sctxt);
+ return ret;
}
/**
@@ -917,7 +894,8 @@ void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len;
if (svc_rdma_send(rdma, sctxt))
goto put_ctxt;
- return;
+
+ wait_for_completion_killable(&sctxt->sc_done);
put_ctxt:
svc_rdma_send_ctxt_put(rdma, sctxt);
@@ -943,60 +921,67 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt;
__be32 *rdma_argp = rctxt->rc_recv_buf;
struct svc_rdma_send_ctxt *sctxt;
+ unsigned int rc_size;
__be32 *p;
int ret;
ret = -ENOTCONN;
if (svc_xprt_is_dead(xprt))
- goto err0;
+ goto drop_connection;
ret = -ENOMEM;
sctxt = svc_rdma_send_ctxt_get(rdma);
if (!sctxt)
- goto err0;
+ goto drop_connection;
+ ret = -EMSGSIZE;
p = xdr_reserve_space(&sctxt->sc_stream,
rpcrdma_fixed_maxsz * sizeof(*p));
if (!p)
- goto err0;
+ goto put_ctxt;
ret = svc_rdma_send_reply_chunk(rdma, rctxt, &rqstp->rq_res);
if (ret < 0)
- goto err2;
+ goto reply_chunk;
+ rc_size = ret;
*p++ = *rdma_argp;
*p++ = *(rdma_argp + 1);
*p++ = rdma->sc_fc_credits;
*p = pcl_is_empty(&rctxt->rc_reply_pcl) ? rdma_msg : rdma_nomsg;
- if (svc_rdma_encode_read_list(sctxt) < 0)
- goto err0;
- if (svc_rdma_encode_write_list(rctxt, sctxt) < 0)
- goto err0;
- if (svc_rdma_encode_reply_chunk(rctxt, sctxt, ret) < 0)
- goto err0;
+ ret = svc_rdma_encode_read_list(sctxt);
+ if (ret < 0)
+ goto put_ctxt;
+ ret = svc_rdma_encode_write_list(rctxt, sctxt);
+ if (ret < 0)
+ goto put_ctxt;
+ ret = svc_rdma_encode_reply_chunk(rctxt, sctxt, rc_size);
+ if (ret < 0)
+ goto put_ctxt;
ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp);
if (ret < 0)
- goto err1;
+ goto put_ctxt;
+
+ /* Prevent svc_xprt_release() from releasing the page backing
+ * rq_res.head[0].iov_base. It's no longer being accessed by
+ * the I/O device. */
+ rqstp->rq_respages++;
return 0;
- err2:
+reply_chunk:
if (ret != -E2BIG && ret != -EINVAL)
- goto err1;
+ goto put_ctxt;
- /* Send completion releases payload pages that were part
- * of previously posted RDMA Writes.
- */
- svc_rdma_save_io_pages(rqstp, sctxt);
svc_rdma_send_error_msg(rdma, sctxt, rctxt, ret);
return 0;
- err1:
+put_ctxt:
svc_rdma_send_ctxt_put(rdma, sctxt);
- err0:
+drop_connection:
trace_svcrdma_send_err(rqstp, ret);
- set_bit(XPT_CLOSE, &xprt->xpt_flags);
+ svc_xprt_deferred_close(&rdma->sc_xprt);
return -ENOTCONN;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index c895f80df659..d94b7759ada1 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -119,8 +119,7 @@ static void qp_event_handler(struct ib_event *event, void *context)
case IB_EVENT_QP_ACCESS_ERR:
case IB_EVENT_DEVICE_FATAL:
default:
- set_bit(XPT_CLOSE, &xprt->xpt_flags);
- svc_xprt_enqueue(xprt);
+ svc_xprt_deferred_close(xprt);
break;
}
}
@@ -137,7 +136,6 @@ static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
svc_xprt_init(net, &svc_rdma_class, &cma_xprt->sc_xprt, serv);
INIT_LIST_HEAD(&cma_xprt->sc_accept_q);
INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
- INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
INIT_LIST_HEAD(&cma_xprt->sc_send_ctxts);
init_llist_head(&cma_xprt->sc_recv_ctxts);
INIT_LIST_HEAD(&cma_xprt->sc_rw_ctxts);
@@ -279,12 +277,14 @@ static int svc_rdma_cma_handler(struct rdma_cm_id *cma_id,
switch (event->event) {
case RDMA_CM_EVENT_ESTABLISHED:
clear_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags);
+
+ /* Handle any requests that were received while
+ * CONN_PENDING was set. */
svc_xprt_enqueue(xprt);
break;
case RDMA_CM_EVENT_DISCONNECTED:
case RDMA_CM_EVENT_DEVICE_REMOVAL:
- set_bit(XPT_CLOSE, &xprt->xpt_flags);
- svc_xprt_enqueue(xprt);
+ svc_xprt_deferred_close(xprt);
break;
default:
break;
@@ -404,11 +404,14 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
newxprt->sc_max_req_size = svcrdma_max_req_size;
newxprt->sc_max_requests = svcrdma_max_requests;
newxprt->sc_max_bc_requests = svcrdma_max_bc_requests;
- rq_depth = newxprt->sc_max_requests + newxprt->sc_max_bc_requests;
+ newxprt->sc_recv_batch = RPCRDMA_MAX_RECV_BATCH;
+ rq_depth = newxprt->sc_max_requests + newxprt->sc_max_bc_requests +
+ newxprt->sc_recv_batch;
if (rq_depth > dev->attrs.max_qp_wr) {
pr_warn("svcrdma: reducing receive depth to %d\n",
dev->attrs.max_qp_wr);
rq_depth = dev->attrs.max_qp_wr;
+ newxprt->sc_recv_batch = 1;
newxprt->sc_max_requests = rq_depth - 2;
newxprt->sc_max_bc_requests = 2;
}
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 78d29d1bcc20..09953597d055 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -262,8 +262,10 @@ xprt_rdma_connect_worker(struct work_struct *work)
* xprt_rdma_inject_disconnect - inject a connection fault
* @xprt: transport context
*
- * If @xprt is connected, disconnect it to simulate spurious connection
- * loss.
+ * If @xprt is connected, disconnect it to simulate spurious
+ * connection loss. Caller must hold @xprt's send lock to
+ * ensure that data structures and hardware resources are
+ * stable during the rdma_disconnect() call.
*/
static void
xprt_rdma_inject_disconnect(struct rpc_xprt *xprt)
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index ec912cf9c618..1e965a380896 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -101,6 +101,12 @@ static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt)
struct rpcrdma_ep *ep = r_xprt->rx_ep;
struct rdma_cm_id *id = ep->re_id;
+ /* Wait for rpcrdma_post_recvs() to leave its critical
+ * section.
+ */
+ if (atomic_inc_return(&ep->re_receiving) > 1)
+ wait_for_completion(&ep->re_done);
+
/* Flush Receives, then wait for deferred Reply work
* to complete.
*/
@@ -114,22 +120,6 @@ static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt)
rpcrdma_ep_put(ep);
}
-/**
- * rpcrdma_qp_event_handler - Handle one QP event (error notification)
- * @event: details of the event
- * @context: ep that owns QP where event occurred
- *
- * Called from the RDMA provider (device driver) possibly in an interrupt
- * context. The QP is always destroyed before the ID, so the ID will be
- * reliably available when this handler is invoked.
- */
-static void rpcrdma_qp_event_handler(struct ib_event *event, void *context)
-{
- struct rpcrdma_ep *ep = context;
-
- trace_xprtrdma_qp_event(ep, event);
-}
-
/* Ensure xprt_force_disconnect() is invoked exactly once when a
* connection is closed or lost. (The important thing is it needs
* to be invoked "at least" once).
@@ -205,7 +195,7 @@ static void rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
out_flushed:
rpcrdma_flush_disconnect(r_xprt, wc);
- rpcrdma_rep_destroy(rep);
+ rpcrdma_rep_put(&r_xprt->rx_buf, rep);
}
static void rpcrdma_update_cm_private(struct rpcrdma_ep *ep,
@@ -414,6 +404,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
__module_get(THIS_MODULE);
device = id->device;
ep->re_id = id;
+ reinit_completion(&ep->re_done);
ep->re_max_requests = r_xprt->rx_xprt.max_reqs;
ep->re_inline_send = xprt_rdma_max_inline_write;
@@ -424,8 +415,6 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
r_xprt->rx_buf.rb_max_requests = cpu_to_be32(ep->re_max_requests);
- ep->re_attr.event_handler = rpcrdma_qp_event_handler;
- ep->re_attr.qp_context = ep;
ep->re_attr.srq = NULL;
ep->re_attr.cap.max_inline_data = 0;
ep->re_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
@@ -535,7 +524,7 @@ int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt)
* outstanding Receives.
*/
rpcrdma_ep_get(ep);
- rpcrdma_post_recvs(r_xprt, true);
+ rpcrdma_post_recvs(r_xprt, 1, true);
rc = rdma_connect(ep->re_id, &ep->re_remote_cma);
if (rc)
@@ -954,13 +943,11 @@ static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt)
rpcrdma_req_reset(req);
}
-/* No locking needed here. This function is called only by the
- * Receive completion handler.
- */
static noinline
struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
bool temp)
{
+ struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_rep *rep;
rep = kzalloc(sizeof(*rep), GFP_KERNEL);
@@ -987,7 +974,10 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
rep->rr_recv_wr.num_sge = 1;
rep->rr_temp = temp;
- list_add(&rep->rr_all, &r_xprt->rx_buf.rb_all_reps);
+
+ spin_lock(&buf->rb_lock);
+ list_add(&rep->rr_all, &buf->rb_all_reps);
+ spin_unlock(&buf->rb_lock);
return rep;
out_free_regbuf:
@@ -998,16 +988,23 @@ out:
return NULL;
}
-/* No locking needed here. This function is invoked only by the
- * Receive completion handler, or during transport shutdown.
- */
-static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
+static void rpcrdma_rep_free(struct rpcrdma_rep *rep)
{
- list_del(&rep->rr_all);
rpcrdma_regbuf_free(rep->rr_rdmabuf);
kfree(rep);
}
+static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
+{
+ struct rpcrdma_buffer *buf = &rep->rr_rxprt->rx_buf;
+
+ spin_lock(&buf->rb_lock);
+ list_del(&rep->rr_all);
+ spin_unlock(&buf->rb_lock);
+
+ rpcrdma_rep_free(rep);
+}
+
static struct rpcrdma_rep *rpcrdma_rep_get_locked(struct rpcrdma_buffer *buf)
{
struct llist_node *node;
@@ -1019,12 +1016,21 @@ static struct rpcrdma_rep *rpcrdma_rep_get_locked(struct rpcrdma_buffer *buf)
return llist_entry(node, struct rpcrdma_rep, rr_node);
}
-static void rpcrdma_rep_put(struct rpcrdma_buffer *buf,
- struct rpcrdma_rep *rep)
+/**
+ * rpcrdma_rep_put - Release rpcrdma_rep back to free list
+ * @buf: buffer pool
+ * @rep: rep to release
+ *
+ */
+void rpcrdma_rep_put(struct rpcrdma_buffer *buf, struct rpcrdma_rep *rep)
{
llist_add(&rep->rr_node, &buf->rb_free_reps);
}
+/* Caller must ensure the QP is quiescent (RQ is drained) before
+ * invoking this function, to guarantee rb_all_reps is not
+ * changing.
+ */
static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
@@ -1032,7 +1038,7 @@ static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt)
list_for_each_entry(rep, &buf->rb_all_reps, rr_all) {
rpcrdma_regbuf_dma_unmap(rep->rr_rdmabuf);
- rep->rr_temp = true;
+ rep->rr_temp = true; /* Mark this rep for destruction */
}
}
@@ -1040,8 +1046,18 @@ static void rpcrdma_reps_destroy(struct rpcrdma_buffer *buf)
{
struct rpcrdma_rep *rep;
- while ((rep = rpcrdma_rep_get_locked(buf)) != NULL)
- rpcrdma_rep_destroy(rep);
+ spin_lock(&buf->rb_lock);
+ while ((rep = list_first_entry_or_null(&buf->rb_all_reps,
+ struct rpcrdma_rep,
+ rr_all)) != NULL) {
+ list_del(&rep->rr_all);
+ spin_unlock(&buf->rb_lock);
+
+ rpcrdma_rep_free(rep);
+
+ spin_lock(&buf->rb_lock);
+ }
+ spin_unlock(&buf->rb_lock);
}
/**
@@ -1104,7 +1120,7 @@ void rpcrdma_req_destroy(struct rpcrdma_req *req)
list_del(&mr->mr_all);
spin_unlock(&buf->rb_lock);
- frwr_release_mr(mr);
+ frwr_mr_release(mr);
}
rpcrdma_regbuf_free(req->rl_recvbuf);
@@ -1135,7 +1151,7 @@ static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt)
list_del(&mr->mr_all);
spin_unlock(&buf->rb_lock);
- frwr_release_mr(mr);
+ frwr_mr_release(mr);
spin_lock(&buf->rb_lock);
}
@@ -1221,17 +1237,6 @@ void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
spin_unlock(&buffers->rb_lock);
}
-/**
- * rpcrdma_recv_buffer_put - Release rpcrdma_rep back to free list
- * @rep: rep to release
- *
- * Used after error conditions.
- */
-void rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
-{
- rpcrdma_rep_put(&rep->rr_rxprt->rx_buf, rep);
-}
-
/* Returns a pointer to a rpcrdma_regbuf object, or NULL.
*
* xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for
@@ -1342,21 +1347,7 @@ static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb)
*/
int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{
- struct ib_send_wr *send_wr = &req->rl_wr;
- struct rpcrdma_ep *ep = r_xprt->rx_ep;
- int rc;
-
- if (!ep->re_send_count || kref_read(&req->rl_kref) > 1) {
- send_wr->send_flags |= IB_SEND_SIGNALED;
- ep->re_send_count = ep->re_send_batch;
- } else {
- send_wr->send_flags &= ~IB_SEND_SIGNALED;
- --ep->re_send_count;
- }
-
- trace_xprtrdma_post_send(req);
- rc = frwr_send(r_xprt, req);
- if (rc)
+ if (frwr_send(r_xprt, req))
return -ENOTCONN;
return 0;
}
@@ -1364,27 +1355,30 @@ int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
/**
* rpcrdma_post_recvs - Refill the Receive Queue
* @r_xprt: controlling transport instance
- * @temp: mark Receive buffers to be deleted after use
+ * @needed: current credit grant
+ * @temp: mark Receive buffers to be deleted after one use
*
*/
-void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
+void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_ep *ep = r_xprt->rx_ep;
struct ib_recv_wr *wr, *bad_wr;
struct rpcrdma_rep *rep;
- int needed, count, rc;
+ int count, rc;
rc = 0;
count = 0;
- needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1);
if (likely(ep->re_receive_count > needed))
goto out;
needed -= ep->re_receive_count;
if (!temp)
needed += RPCRDMA_MAX_RECV_BATCH;
+ if (atomic_inc_return(&ep->re_receiving) > 1)
+ goto out;
+
/* fast path: all needed reps can be found on the free list */
wr = NULL;
while (needed) {
@@ -1410,6 +1404,9 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
rc = ib_post_recv(ep->re_id->qp, wr,
(const struct ib_recv_wr **)&bad_wr);
+ if (atomic_dec_return(&ep->re_receiving) > 0)
+ complete(&ep->re_done);
+
out:
trace_xprtrdma_post_recvs(r_xprt, count, rc);
if (rc) {
@@ -1418,7 +1415,7 @@ out:
rep = container_of(wr, struct rpcrdma_rep, rr_recv_wr);
wr = wr->next;
- rpcrdma_recv_buffer_put(rep);
+ rpcrdma_rep_put(buf, rep);
--count;
}
}
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index fe3be985e239..436ad7312614 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -83,6 +83,7 @@ struct rpcrdma_ep {
unsigned int re_max_inline_recv;
int re_async_rc;
int re_connect_status;
+ atomic_t re_receiving;
atomic_t re_force_disconnect;
struct ib_qp_init_attr re_attr;
wait_queue_head_t re_connect_wait;
@@ -228,31 +229,28 @@ struct rpcrdma_sendctx {
* An external memory region is any buffer or page that is registered
* on the fly (ie, not pre-registered).
*/
-struct rpcrdma_frwr {
- struct ib_mr *fr_mr;
- struct ib_cqe fr_cqe;
- struct rpc_rdma_cid fr_cid;
- struct completion fr_linv_done;
- union {
- struct ib_reg_wr fr_regwr;
- struct ib_send_wr fr_invwr;
- };
-};
-
struct rpcrdma_req;
struct rpcrdma_mr {
struct list_head mr_list;
struct rpcrdma_req *mr_req;
+
+ struct ib_mr *mr_ibmr;
struct ib_device *mr_device;
struct scatterlist *mr_sg;
int mr_nents;
enum dma_data_direction mr_dir;
- struct rpcrdma_frwr frwr;
+ struct ib_cqe mr_cqe;
+ struct completion mr_linv_done;
+ union {
+ struct ib_reg_wr mr_regwr;
+ struct ib_send_wr mr_invwr;
+ };
struct rpcrdma_xprt *mr_xprt;
u32 mr_handle;
u32 mr_length;
u64 mr_offset;
struct list_head mr_all;
+ struct rpc_rdma_cid mr_cid;
};
/*
@@ -461,7 +459,7 @@ int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt);
void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt);
int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req);
-void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp);
+void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp);
/*
* Buffer calls - xprtrdma/verbs.c
@@ -480,7 +478,7 @@ void rpcrdma_mrs_refresh(struct rpcrdma_xprt *r_xprt);
struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers,
struct rpcrdma_req *req);
-void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
+void rpcrdma_rep_put(struct rpcrdma_buffer *buf, struct rpcrdma_rep *rep);
bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size,
gfp_t flags);
@@ -527,7 +525,7 @@ rpcrdma_data_dir(bool writing)
void frwr_reset(struct rpcrdma_req *req);
int frwr_query_device(struct rpcrdma_ep *ep, const struct ib_device *device);
int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr);
-void frwr_release_mr(struct rpcrdma_mr *mr);
+void frwr_mr_release(struct rpcrdma_mr *mr);
struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_mr_seg *seg,
int nsegs, bool writing, __be32 xid,
@@ -560,6 +558,7 @@ int rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst);
void rpcrdma_set_max_header_sizes(struct rpcrdma_ep *ep);
void rpcrdma_reset_cwnd(struct rpcrdma_xprt *r_xprt);
void rpcrdma_complete_rqst(struct rpcrdma_rep *rep);
+void rpcrdma_unpin_rqst(struct rpcrdma_rep *rep);
void rpcrdma_reply_handler(struct rpcrdma_rep *rep);
static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len)
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index e35760f238a4..47aa47a2b07c 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -558,6 +558,10 @@ xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags)
struct rpc_rqst *req;
ssize_t ret;
+ /* Is this transport associated with the backchannel? */
+ if (!xprt->bc_serv)
+ return -ESHUTDOWN;
+
/* Look up and lock the request corresponding to the given XID */
req = xprt_lookup_bc_request(xprt, transport->recv.xid);
if (!req) {
@@ -1018,6 +1022,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
* to cope with writespace callbacks arriving _after_ we have
* called sendmsg(). */
req->rq_xtime = ktime_get();
+ tcp_sock_set_cork(transport->inet, true);
while (1) {
status = xprt_sock_sendmsg(transport->sock, &msg, xdr,
transport->xmit.offset, rm, &sent);
@@ -1032,6 +1037,8 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
if (likely(req->rq_bytes_sent >= msglen)) {
req->rq_xmit_bytes_sent += transport->xmit.offset;
transport->xmit.offset = 0;
+ if (atomic_long_read(&xprt->xmit_queuelen) == 1)
+ tcp_sock_set_cork(transport->inet, false);
return 0;
}
@@ -2163,6 +2170,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
}
xs_tcp_set_socket_timeouts(xprt, sock);
+ tcp_sock_set_nodelay(sk);
write_lock_bh(&sk->sk_callback_lock);
@@ -2177,7 +2185,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
/* socket options */
sock_reset_flag(sk, SOCK_LINGER);
- tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
xprt_clear_connected(xprt);