Following the *BSD code, we implement separate rtt timers for GETATTR,
LOOKUP, READ/READDIR/READLINK, and WRITE. In addition to this, there
is one extra timer for the COMMIT operation.
All the remaining RPC calls use the current system in which a fixed
timeout value gets set by the 'timeo' mount option.
In case of a timeout, the current exponential backoff algoritm is
implemented. Subsequent patches will improve this...
Cheers,
Trond
diff -u --recursive --new-file linux-2.5.25/fs/lockd/xdr.c linux-2.5.25-rtt1/fs/lockd/xdr.c
--- linux-2.5.25/fs/lockd/xdr.c Mon Feb 25 19:35:33 2002
+++ linux-2.5.25-rtt1/fs/lockd/xdr.c Tue Jul 16 11:43:08 2002
@@ -561,11 +561,10 @@
#define nlmclt_decode_norep NULL
#define PROC(proc, argtype, restype) \
- { "nlm_" #proc, \
- (kxdrproc_t) nlmclt_encode_##argtype, \
- (kxdrproc_t) nlmclt_decode_##restype, \
- MAX(NLM_##argtype##_sz, NLM_##restype##_sz) << 2, \
- 0 \
+ { .p_procname = "nlm_" #proc, \
+ .p_encode = (kxdrproc_t) nlmclt_encode_##argtype, \
+ .p_decode = (kxdrproc_t) nlmclt_decode_##restype, \
+ .p_bufsiz = MAX(NLM_##argtype##_sz, NLM_##restype##_sz) << 2 \
}
static struct rpc_procinfo nlm_procedures[] = {
diff -u --recursive --new-file linux-2.5.25/fs/lockd/xdr4.c linux-2.5.25-rtt1/fs/lockd/xdr4.c
--- linux-2.5.25/fs/lockd/xdr4.c Mon Feb 25 19:35:33 2002
+++ linux-2.5.25-rtt1/fs/lockd/xdr4.c Tue Jul 16 11:43:08 2002
@@ -566,12 +566,11 @@
*/
#define nlm4clt_decode_norep NULL
-#define PROC(proc, argtype, restype) \
- { "nlm4_" #proc, \
- (kxdrproc_t) nlm4clt_encode_##argtype, \
- (kxdrproc_t) nlm4clt_decode_##restype, \
- MAX(NLM4_##argtype##_sz, NLM4_##restype##_sz) << 2, \
- 0 \
+#define PROC(proc, argtype, restype) \
+ { .p_procname = "nlm4_" #proc, \
+ .p_encode = (kxdrproc_t) nlm4clt_encode_##argtype, \
+ .p_decode = (kxdrproc_t) nlm4clt_decode_##restype, \
+ .p_bufsiz = MAX(NLM4_##argtype##_sz, NLM4_##restype##_sz) << 2 \
}
static struct rpc_procinfo nlm4_procedures[] = {
diff -u --recursive --new-file linux-2.5.25/fs/nfs/nfs2xdr.c linux-2.5.25-rtt1/fs/nfs/nfs2xdr.c
--- linux-2.5.25/fs/nfs/nfs2xdr.c Sat May 25 21:34:59 2002
+++ linux-2.5.25-rtt1/fs/nfs/nfs2xdr.c Tue Jul 16 11:43:08 2002
@@ -671,33 +671,32 @@
# define MAX(a, b) (((a) > (b))? (a) : (b))
#endif
-#define PROC(proc, argtype, restype) \
- { "nfs_" #proc, \
- (kxdrproc_t) nfs_xdr_##argtype, \
- (kxdrproc_t) nfs_xdr_##restype, \
- MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2, \
- 0 \
+#define PROC(proc, argtype, restype, timer) \
+ { .p_procname = "nfs_" #proc, \
+ .p_encode = (kxdrproc_t) nfs_xdr_##argtype, \
+ .p_decode = (kxdrproc_t) nfs_xdr_##restype, \
+ .p_bufsiz = MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2, \
+ .p_timer = timer \
}
-
static struct rpc_procinfo nfs_procedures[18] = {
- PROC(null, enc_void, dec_void),
- PROC(getattr, fhandle, attrstat),
- PROC(setattr, sattrargs, attrstat),
- PROC(root, enc_void, dec_void),
- PROC(lookup, diropargs, diropres),
- PROC(readlink, readlinkargs, readlinkres),
- PROC(read, readargs, readres),
- PROC(writecache, enc_void, dec_void),
- PROC(write, writeargs, writeres),
- PROC(create, createargs, diropres),
- PROC(remove, diropargs, stat),
- PROC(rename, renameargs, stat),
- PROC(link, linkargs, stat),
- PROC(symlink, symlinkargs, stat),
- PROC(mkdir, createargs, diropres),
- PROC(rmdir, diropargs, stat),
- PROC(readdir, readdirargs, readdirres),
- PROC(statfs, fhandle, statfsres),
+ PROC(null, enc_void, dec_void, 0),
+ PROC(getattr, fhandle, attrstat, 1),
+ PROC(setattr, sattrargs, attrstat, 0),
+ PROC(root, enc_void, dec_void, 0),
+ PROC(lookup, diropargs, diropres, 2),
+ PROC(readlink, readlinkargs, readlinkres, 3),
+ PROC(read, readargs, readres, 3),
+ PROC(writecache, enc_void, dec_void, 0),
+ PROC(write, writeargs, writeres, 4),
+ PROC(create, createargs, diropres, 0),
+ PROC(remove, diropargs, stat, 0),
+ PROC(rename, renameargs, stat, 0),
+ PROC(link, linkargs, stat, 0),
+ PROC(symlink, symlinkargs, stat, 0),
+ PROC(mkdir, createargs, diropres, 0),
+ PROC(rmdir, diropargs, stat, 0),
+ PROC(readdir, readdirargs, readdirres, 3),
+ PROC(statfs, fhandle, statfsres, 0),
};
struct rpc_version nfs_version2 = {
diff -u --recursive --new-file linux-2.5.25/fs/nfs/nfs3xdr.c linux-2.5.25-rtt1/fs/nfs/nfs3xdr.c
--- linux-2.5.25/fs/nfs/nfs3xdr.c Sat May 25 21:25:56 2002
+++ linux-2.5.25-rtt1/fs/nfs/nfs3xdr.c Tue Jul 16 11:43:08 2002
@@ -988,37 +988,37 @@
# define MAX(a, b) (((a) > (b))? (a) : (b))
#endif
-#define PROC(proc, argtype, restype) \
- { "nfs3_" #proc, \
- (kxdrproc_t) nfs3_xdr_##argtype, \
- (kxdrproc_t) nfs3_xdr_##restype, \
- MAX(NFS3_##argtype##_sz,NFS3_##restype##_sz) << 2, \
- 0 \
+#define PROC(proc, argtype, restype, timer) \
+ { .p_procname = "nfs3_" #proc, \
+ .p_encode = (kxdrproc_t) nfs3_xdr_##argtype, \
+ .p_decode = (kxdrproc_t) nfs3_xdr_##restype, \
+ .p_bufsiz = MAX(NFS3_##argtype##_sz,NFS3_##restype##_sz) << 2, \
+ .p_timer = timer \
}
static struct rpc_procinfo nfs3_procedures[22] = {
- PROC(null, enc_void, dec_void),
- PROC(getattr, fhandle, attrstat),
- PROC(setattr, sattrargs, wccstat),
- PROC(lookup, diropargs, lookupres),
- PROC(access, accessargs, accessres),
- PROC(readlink, readlinkargs, readlinkres),
- PROC(read, readargs, readres),
- PROC(write, writeargs, writeres),
- PROC(create, createargs, createres),
- PROC(mkdir, mkdirargs, createres),
- PROC(symlink, symlinkargs, createres),
- PROC(mknod, mknodargs, createres),
- PROC(remove, diropargs, wccstat),
- PROC(rmdir, diropargs, wccstat),
- PROC(rename, renameargs, renameres),
- PROC(link, linkargs, linkres),
- PROC(readdir, readdirargs, readdirres),
- PROC(readdirplus, readdirargs, readdirres),
- PROC(fsstat, fhandle, fsstatres),
- PROC(fsinfo, fhandle, fsinfores),
- PROC(pathconf, fhandle, pathconfres),
- PROC(commit, commitargs, commitres),
+ PROC(null, enc_void, dec_void, 0),
+ PROC(getattr, fhandle, attrstat, 1),
+ PROC(setattr, sattrargs, wccstat, 0),
+ PROC(lookup, diropargs, lookupres, 2),
+ PROC(access, accessargs, accessres, 1),
+ PROC(readlink, readlinkargs, readlinkres, 3),
+ PROC(read, readargs, readres, 3),
+ PROC(write, writeargs, writeres, 4),
+ PROC(create, createargs, createres, 0),
+ PROC(mkdir, mkdirargs, createres, 0),
+ PROC(symlink, symlinkargs, createres, 0),
+ PROC(mknod, mknodargs, createres, 0),
+ PROC(remove, diropargs, wccstat, 0),
+ PROC(rmdir, diropargs, wccstat, 0),
+ PROC(rename, renameargs, renameres, 0),
+ PROC(link, linkargs, linkres, 0),
+ PROC(readdir, readdirargs, readdirres, 3),
+ PROC(readdirplus, readdirargs, readdirres, 3),
+ PROC(fsstat, fhandle, fsstatres, 0),
+ PROC(fsinfo, fhandle, fsinfores, 0),
+ PROC(pathconf, fhandle, pathconfres, 0),
+ PROC(commit, commitargs, commitres, 5),
};
struct rpc_version nfs_version3 = {
diff -u --recursive --new-file linux-2.5.25/include/linux/sunrpc/clnt.h linux-2.5.25-rtt1/include/linux/sunrpc/clnt.h
--- linux-2.5.25/include/linux/sunrpc/clnt.h Tue Feb 5 16:23:43 2002
+++ linux-2.5.25-rtt1/include/linux/sunrpc/clnt.h Tue Jul 16 11:44:28 2002
@@ -15,6 +15,7 @@
#include <linux/sunrpc/auth.h>
#include <linux/sunrpc/stats.h>
#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/timer.h>
#include <asm/signal.h>
/*
@@ -52,6 +53,8 @@
unsigned int cl_flags; /* misc client flags */
unsigned long cl_hardmax; /* max hard timeout */
+ struct rpc_rtt cl_rtt; /* RTO estimator data */
+
struct rpc_portmap cl_pmap; /* port mapping */
struct rpc_wait_queue cl_bindwait; /* waiting on getport() */
@@ -91,6 +94,7 @@
kxdrproc_t p_decode; /* XDR decode function */
unsigned int p_bufsiz; /* req. buffer size */
unsigned int p_count; /* call count */
+ unsigned int p_timer; /* Which RTT timer to use */
};
#define rpcproc_bufsiz(clnt, proc) ((clnt)->cl_procinfo[proc].p_bufsiz)
@@ -98,6 +102,7 @@
#define rpcproc_decode(clnt, proc) ((clnt)->cl_procinfo[proc].p_decode)
#define rpcproc_name(clnt, proc) ((clnt)->cl_procinfo[proc].p_procname)
#define rpcproc_count(clnt, proc) ((clnt)->cl_procinfo[proc].p_count)
+#define rpcproc_timer(clnt, proc) ((clnt)->cl_procinfo[proc].p_timer)
#define RPC_CONGESTED(clnt) (RPCXPRT_CONGESTED((clnt)->cl_xprt))
#define RPC_PEERADDR(clnt) (&(clnt)->cl_xprt->addr)
diff -u --recursive --new-file linux-2.5.25/include/linux/sunrpc/timer.h linux-2.5.25-rtt1/include/linux/sunrpc/timer.h
--- linux-2.5.25/include/linux/sunrpc/timer.h Thu Jan 1 01:00:00 1970
+++ linux-2.5.25-rtt1/include/linux/sunrpc/timer.h Tue Jul 16 11:43:08 2002
@@ -0,0 +1,23 @@
+/*
+ * linux/include/linux/sunrpc/timer.h
+ *
+ * Declarations for the RPC transport timer.
+ *
+ * Copyright (C) 2002 Trond Myklebust <trond.myklebust@fys.uio.no>
+ */
+
+#ifndef _LINUX_SUNRPC_TIMER_H
+#define _LINUX_SUNRPC_TIMER_H
+
+struct rpc_rtt {
+ long timeo; /* default timeout value */
+ long srtt[5]; /* smoothed round trip time << 3 */
+ long sdrtt[5]; /* soothed medium deviation of RTT */
+};
+
+
+extern void rpc_init_rtt(struct rpc_rtt *rt, long timeo);
+extern void rpc_update_rtt(struct rpc_rtt *rt, int timer, long m);
+extern long rpc_calc_rto(struct rpc_rtt *rt, int timer);
+
+#endif /* _LINUX_SUNRPC_TIMER_H */
diff -u --recursive --new-file linux-2.5.25/include/linux/sunrpc/xprt.h linux-2.5.25-rtt1/include/linux/sunrpc/xprt.h
--- linux-2.5.25/include/linux/sunrpc/xprt.h Sun Jul 7 19:43:16 2002
+++ linux-2.5.25-rtt1/include/linux/sunrpc/xprt.h Tue Jul 16 11:43:08 2002
@@ -98,9 +98,8 @@
u32 rq_bytes_sent; /* Bytes we have sent */
-#ifdef RPC_PROFILE
- unsigned long rq_xtime; /* when transmitted */
-#endif
+ long rq_xtime; /* when transmitted */
+ int rq_nresend;
};
#define rq_svec rq_snd_buf.head
#define rq_slen rq_snd_buf.len
diff -u --recursive --new-file linux-2.5.25/net/sunrpc/Makefile linux-2.5.25-rtt1/net/sunrpc/Makefile
--- linux-2.5.25/net/sunrpc/Makefile Sat May 25 01:33:46 2002
+++ linux-2.5.25-rtt1/net/sunrpc/Makefile Tue Jul 16 11:46:29 2002
@@ -9,7 +9,8 @@
sunrpc-y := clnt.o xprt.o sched.o \
auth.o auth_null.o auth_unix.o \
svc.o svcsock.o svcauth.o \
- pmap_clnt.o xdr.o sunrpc_syms.o
+ pmap_clnt.o timer.o xdr.o \
+ sunrpc_syms.o
sunrpc-$(CONFIG_PROC_FS) += stats.o
sunrpc-$(CONFIG_SYSCTL) += sysctl.o
sunrpc-objs := $(sunrpc-y)
diff -u --recursive --new-file linux-2.5.25/net/sunrpc/clnt.c linux-2.5.25-rtt1/net/sunrpc/clnt.c
--- linux-2.5.25/net/sunrpc/clnt.c Sun Jul 7 19:43:16 2002
+++ linux-2.5.25-rtt1/net/sunrpc/clnt.c Tue Jul 16 11:43:08 2002
@@ -104,6 +104,8 @@
if (!clnt->cl_port)
clnt->cl_autobind = 1;
+ rpc_init_rtt(&clnt->cl_rtt, xprt->timeout.to_initval);
+
if (!rpcauth_create(flavor, clnt))
goto out_no_auth;
diff -u --recursive --new-file linux-2.5.25/net/sunrpc/timer.c linux-2.5.25-rtt1/net/sunrpc/timer.c
--- linux-2.5.25/net/sunrpc/timer.c Thu Jan 1 01:00:00 1970
+++ linux-2.5.25-rtt1/net/sunrpc/timer.c Tue Jul 16 11:43:08 2002
@@ -0,0 +1,73 @@
+#include <linux/version.h>
+#include <linux/types.h>
+#include <linux/unistd.h>
+
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/xprt.h>
+#include <linux/sunrpc/timer.h>
+
+#define RPC_RTO_MAX (60*HZ)
+#define RPC_RTO_INIT (HZ/5)
+#define RPC_RTO_MIN (2)
+
+void
+rpc_init_rtt(struct rpc_rtt *rt, long timeo)
+{
+ long t = (timeo - RPC_RTO_INIT) << 3;
+ int i;
+ rt->timeo = timeo;
+ if (t < 0)
+ t = 0;
+ for (i = 0; i < 5; i++) {
+ rt->srtt[i] = t;
+ rt->sdrtt[i] = RPC_RTO_INIT;
+ }
+}
+
+void
+rpc_update_rtt(struct rpc_rtt *rt, int timer, long m)
+{
+ long *srtt, *sdrtt;
+
+ if (timer-- == 0)
+ return;
+
+ if (m == 0)
+ m = 1;
+ srtt = &rt->srtt[timer];
+ m -= *srtt >> 3;
+ *srtt += m;
+ if (m < 0)
+ m = -m;
+ sdrtt = &rt->sdrtt[timer];
+ m -= *sdrtt >> 2;
+ *sdrtt += m;
+ /* Set lower bound on the variance */
+ if (*sdrtt < RPC_RTO_MIN)
+ *sdrtt = RPC_RTO_MIN;
+}
+
+/*
+ * Estimate rto for an nfs rpc sent via. an unreliable datagram.
+ * Use the mean and mean deviation of rtt for the appropriate type of rpc
+ * for the frequent rpcs and a default for the others.
+ * The justification for doing "other" this way is that these rpcs
+ * happen so infrequently that timer est. would probably be stale.
+ * Also, since many of these rpcs are
+ * non-idempotent, a conservative timeout is desired.
+ * getattr, lookup,
+ * read, write, commit - A+4D
+ * other - timeo
+ */
+
+long
+rpc_calc_rto(struct rpc_rtt *rt, int timer)
+{
+ long res;
+ if (timer-- == 0)
+ return rt->timeo;
+ res = (rt->srtt[timer] >> 3) + rt->sdrtt[timer];
+ if (res > RPC_RTO_MAX)
+ res = RPC_RTO_MAX;
+ return res;
+}
diff -u --recursive --new-file linux-2.5.25/net/sunrpc/xprt.c linux-2.5.25-rtt1/net/sunrpc/xprt.c
--- linux-2.5.25/net/sunrpc/xprt.c Sun Jul 7 19:57:26 2002
+++ linux-2.5.25-rtt1/net/sunrpc/xprt.c Tue Jul 16 11:43:08 2002
@@ -480,13 +480,20 @@
* Complete reply received.
* The TCP code relies on us to remove the request from xprt->pending.
*/
-static inline void
+static void
xprt_complete_rqst(struct rpc_xprt *xprt, struct rpc_rqst *req, int copied)
{
struct rpc_task *task = req->rq_task;
+ struct rpc_clnt *clnt = task->tk_client;
/* Adjust congestion window */
- xprt_adjust_cwnd(xprt, copied);
+ if (!xprt->nocong) {
+ xprt_adjust_cwnd(xprt, copied);
+ if (!req->rq_nresend) {
+ int timer = rpcproc_timer(clnt, task->tk_msg.rpc_proc);
+ if (timer)
+ rpc_update_rtt(&clnt->cl_rtt, timer, (long)jiffies - req->rq_xtime);
+ }
#ifdef RPC_PROFILE
/* Profile only reads for now */
@@ -934,6 +941,7 @@
spin_lock(&xprt->sock_lock);
if (req->rq_received)
goto out;
+ req->rq_nresend++;
xprt_adjust_cwnd(xprt, -ETIMEDOUT);
dprintk("RPC: %4d xprt_timer (%s request)\n",
@@ -982,15 +990,13 @@
if (!xprt_lock_write(xprt, task))
return;
-#ifdef RPC_PROFILE
- req->rq_xtime = jiffies;
-#endif
do_xprt_transmit(task);
}
static void
do_xprt_transmit(struct rpc_task *task)
{
+ struct rpc_clnt *clnt = task->tk_client;
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
int status, retry = 0;
@@ -1002,6 +1008,7 @@
*/
while (1) {
xprt_clear_wspace(xprt);
+ req->rq_xtime = jiffies;
status = xprt_sendmsg(xprt, req);
if (status < 0)
@@ -1065,7 +1072,21 @@
out_receive:
dprintk("RPC: %4d xmit complete\n", task->tk_pid);
/* Set the task's receive timeout value */
- task->tk_timeout = req->rq_timeout.to_current;
+ if (!xprt->nocong) {
+ int backoff;
+ task->tk_timeout = rpc_calc_rto(&clnt->cl_rtt,
+ rpcproc_timer(clnt, task->tk_msg.rpc_proc));
+ /* If we are retransmitting, increment the timeout counter */
+ backoff = req->rq_nresend;
+ if (backoff) {
+ if (backoff > 7)
+ backoff = 7;
+ task->tk_timeout <<= backoff;
+ }
+ if (task->tk_timeout > req->rq_timeout.to_maxval)
+ task->tk_timeout = req->rq_timeout.to_maxval;
+ } else
+ task->tk_timeout = req->rq_timeout.to_current;
spin_lock_bh(&xprt->sock_lock);
if (!req->rq_received)
rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer);
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/