summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Makefile5
-rw-r--r--fs/compat.c2
-rw-r--r--fs/lockd/host.c1
-rw-r--r--fs/lockd/mon.c1
-rw-r--r--fs/lockd/svc.c2
-rw-r--r--fs/lockd/svc4proc.c2
-rw-r--r--fs/lockd/svclock.c31
-rw-r--r--fs/lockd/svcproc.c2
-rw-r--r--fs/nfs/Kconfig8
-rw-r--r--fs/nfs/Makefile4
-rw-r--r--fs/nfs/callback.c4
-rw-r--r--fs/nfs/callback_proc.c8
-rw-r--r--fs/nfs/client.c11
-rw-r--r--fs/nfs/dns_resolve.c6
-rw-r--r--fs/nfs/file.c5
-rw-r--r--fs/nfs/inode.c3
-rw-r--r--fs/nfs/mount_clnt.c2
-rw-r--r--fs/nfs/nfs4filelayout.c280
-rw-r--r--fs/nfs/nfs4filelayout.h94
-rw-r--r--fs/nfs/nfs4filelayoutdev.c448
-rw-r--r--fs/nfs/nfs4proc.c218
-rw-r--r--fs/nfs/nfs4state.c2
-rw-r--r--fs/nfs/nfs4xdr.c360
-rw-r--r--fs/nfs/pnfs.c783
-rw-r--r--fs/nfs/pnfs.h189
-rw-r--r--fs/nfs/read.c3
-rw-r--r--fs/nfsd/Kconfig12
-rw-r--r--fs/nfsd/export.c73
-rw-r--r--fs/nfsd/nfs4callback.c245
-rw-r--r--fs/nfsd/nfs4idmap.c105
-rw-r--r--fs/nfsd/nfs4proc.c7
-rw-r--r--fs/nfsd/nfs4state.c493
-rw-r--r--fs/nfsd/nfs4xdr.c18
-rw-r--r--fs/nfsd/nfsctl.c26
-rw-r--r--fs/nfsd/nfsd.h2
-rw-r--r--fs/nfsd/nfssvc.c5
-rw-r--r--fs/nfsd/state.h52
37 files changed, 3009 insertions, 503 deletions
diff --git a/fs/Makefile b/fs/Makefile
index e6ec1d309b1d..26956fcec917 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -29,10 +29,7 @@ obj-$(CONFIG_EVENTFD) += eventfd.o
obj-$(CONFIG_AIO) += aio.o
obj-$(CONFIG_FILE_LOCKING) += locks.o
obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o
-
-nfsd-$(CONFIG_NFSD) := nfsctl.o
-obj-y += $(nfsd-y) $(nfsd-m)
-
+obj-$(CONFIG_NFSD_DEPRECATED) += nfsctl.o
obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o
obj-$(CONFIG_BINFMT_EM86) += binfmt_em86.o
obj-$(CONFIG_BINFMT_MISC) += binfmt_misc.o
diff --git a/fs/compat.c b/fs/compat.c
index 0644a154672b..f03abdadc401 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1963,7 +1963,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
}
#endif /* HAVE_SET_RESTORE_SIGMASK */
-#if defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE)
+#if (defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE)) && !defined(CONFIG_NFSD_DEPRECATED)
/* Stuff for NFS server syscalls... */
struct compat_nfsctl_svc {
u16 svc32_port;
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index bb464d12104c..25e21e4023b2 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -353,6 +353,7 @@ nlm_bind_host(struct nlm_host *host)
.to_retries = 5U,
};
struct rpc_create_args args = {
+ .net = &init_net,
.protocol = host->h_proto,
.address = nlm_addr(host),
.addrsize = host->h_addrlen,
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index e3015464fbab..e0c918949644 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -69,6 +69,7 @@ static struct rpc_clnt *nsm_create(void)
.sin_addr.s_addr = htonl(INADDR_LOOPBACK),
};
struct rpc_create_args args = {
+ .net = &init_net,
.protocol = XPRT_TRANSPORT_UDP,
.address = (struct sockaddr *)&sin,
.addrsize = sizeof(sin),
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index f1bacf1a0391..b13aabc12298 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -206,7 +206,7 @@ static int create_lockd_listener(struct svc_serv *serv, const char *name,
xprt = svc_find_xprt(serv, name, family, 0);
if (xprt == NULL)
- return svc_create_xprt(serv, name, family, port,
+ return svc_create_xprt(serv, name, &init_net, family, port,
SVC_SOCK_DEFAULTS);
svc_xprt_put(xprt);
return 0;
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 031c6569a134..a336e832475d 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -230,9 +230,7 @@ static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
static void nlm4svc_callback_release(void *data)
{
- lock_kernel();
nlm_release_call(data);
- unlock_kernel();
}
static const struct rpc_call_ops nlm4svc_callback_ops = {
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 84055d31bfc5..6f1ef000975a 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -52,12 +52,13 @@ static const struct rpc_call_ops nlmsvc_grant_ops;
* The list of blocked locks to retry
*/
static LIST_HEAD(nlm_blocked);
+static DEFINE_SPINLOCK(nlm_blocked_lock);
/*
* Insert a blocked lock into the global list
*/
static void
-nlmsvc_insert_block(struct nlm_block *block, unsigned long when)
+nlmsvc_insert_block_locked(struct nlm_block *block, unsigned long when)
{
struct nlm_block *b;
struct list_head *pos;
@@ -87,6 +88,13 @@ nlmsvc_insert_block(struct nlm_block *block, unsigned long when)
block->b_when = when;
}
+static void nlmsvc_insert_block(struct nlm_block *block, unsigned long when)
+{
+ spin_lock(&nlm_blocked_lock);
+ nlmsvc_insert_block_locked(block, when);
+ spin_unlock(&nlm_blocked_lock);
+}
+
/*
* Remove a block from the global list
*/
@@ -94,7 +102,9 @@ static inline void
nlmsvc_remove_block(struct nlm_block *block)
{
if (!list_empty(&block->b_list)) {
+ spin_lock(&nlm_blocked_lock);
list_del_init(&block->b_list);
+ spin_unlock(&nlm_blocked_lock);
nlmsvc_release_block(block);
}
}
@@ -651,7 +661,7 @@ static int nlmsvc_grant_deferred(struct file_lock *fl, struct file_lock *conf,
struct nlm_block *block;
int rc = -ENOENT;
- lock_kernel();
+ spin_lock(&nlm_blocked_lock);
list_for_each_entry(block, &nlm_blocked, b_list) {
if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) {
dprintk("lockd: nlmsvc_notify_blocked block %p flags %d\n",
@@ -665,13 +675,13 @@ static int nlmsvc_grant_deferred(struct file_lock *fl, struct file_lock *conf,
} else if (result == 0)
block->b_granted = 1;
- nlmsvc_insert_block(block, 0);
+ nlmsvc_insert_block_locked(block, 0);
svc_wake_up(block->b_daemon);
rc = 0;
break;
}
}
- unlock_kernel();
+ spin_unlock(&nlm_blocked_lock);
if (rc == -ENOENT)
printk(KERN_WARNING "lockd: grant for unknown block\n");
return rc;
@@ -803,7 +813,7 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
dprintk("lockd: GRANT_MSG RPC callback\n");
- lock_kernel();
+ spin_lock(&nlm_blocked_lock);
/* if the block is not on a list at this point then it has
* been invalidated. Don't try to requeue it.
*
@@ -825,19 +835,20 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
/* Call was successful, now wait for client callback */
timeout = 60 * HZ;
}
- nlmsvc_insert_block(block, timeout);
+ nlmsvc_insert_block_locked(block, timeout);
svc_wake_up(block->b_daemon);
out:
- unlock_kernel();
+ spin_unlock(&nlm_blocked_lock);
}
+/*
+ * FIXME: nlmsvc_release_block() grabs a mutex. This is not allowed for an
+ * .rpc_release rpc_call_op
+ */
static void nlmsvc_grant_release(void *data)
{
struct nlm_rqst *call = data;
-
- lock_kernel();
nlmsvc_release_block(call->a_block);
- unlock_kernel();
}
static const struct rpc_call_ops nlmsvc_grant_ops = {
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 0f2ab741ae7c..c3069f38d602 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -260,9 +260,7 @@ static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
static void nlmsvc_callback_release(void *data)
{
- lock_kernel();
nlm_release_call(data);
- unlock_kernel();
}
static const struct rpc_call_ops nlmsvc_callback_ops = {
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 5c55c26af165..fd667652c502 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -77,13 +77,17 @@ config NFS_V4
config NFS_V4_1
bool "NFS client support for NFSv4.1 (EXPERIMENTAL)"
- depends on NFS_V4 && EXPERIMENTAL
+ depends on NFS_FS && NFS_V4 && EXPERIMENTAL
+ select PNFS_FILE_LAYOUT
help
This option enables support for minor version 1 of the NFSv4 protocol
- (draft-ietf-nfsv4-minorversion1) in the kernel's NFS client.
+ (RFC 5661) in the kernel's NFS client.
If unsure, say N.
+config PNFS_FILE_LAYOUT
+ tristate
+
config ROOT_NFS
bool "Root file system on NFS"
depends on NFS_FS=y && IP_PNP
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index da7fda639eac..4776ff9e3814 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -15,5 +15,9 @@ nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
delegation.o idmap.o \
callback.o callback_xdr.o callback_proc.o \
nfs4namespace.o
+nfs-$(CONFIG_NFS_V4_1) += pnfs.o
nfs-$(CONFIG_SYSCTL) += sysctl.o
nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o
+
+obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o
+nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index e17b49e2eabd..aeec017fe814 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -109,7 +109,7 @@ nfs4_callback_up(struct svc_serv *serv)
{
int ret;
- ret = svc_create_xprt(serv, "tcp", PF_INET,
+ ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET,
nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
if (ret <= 0)
goto out_err;
@@ -117,7 +117,7 @@ nfs4_callback_up(struct svc_serv *serv)
dprintk("NFS: Callback listener port = %u (af %u)\n",
nfs_callback_tcpport, PF_INET);
- ret = svc_create_xprt(serv, "tcp", PF_INET6,
+ ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET6,
nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
if (ret > 0) {
nfs_callback_tcpport6 = ret;
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 930d10fecdaf..2950fca0c61b 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -118,11 +118,11 @@ int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const n
if (delegation == NULL)
return 0;
- /* seqid is 4-bytes long */
- if (((u32 *) &stateid->data)[0] != 0)
+ if (stateid->stateid.seqid != 0)
return 0;
- if (memcmp(&delegation->stateid.data[4], &stateid->data[4],
- sizeof(stateid->data)-4))
+ if (memcmp(&delegation->stateid.stateid.other,
+ &stateid->stateid.other,
+ NFS4_STATEID_OTHER_SIZE))
return 0;
return 1;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index a882785eba41..0870d0d4efc0 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -48,6 +48,7 @@
#include "iostat.h"
#include "internal.h"
#include "fscache.h"
+#include "pnfs.h"
#define NFSDBG_FACILITY NFSDBG_CLIENT
@@ -155,7 +156,9 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
cred = rpc_lookup_machine_cred();
if (!IS_ERR(cred))
clp->cl_machine_cred = cred;
-
+#if defined(CONFIG_NFS_V4_1)
+ INIT_LIST_HEAD(&clp->cl_layouts);
+#endif
nfs_fscache_get_client_cookie(clp);
return clp;
@@ -252,6 +255,7 @@ void nfs_put_client(struct nfs_client *clp)
nfs_free_client(clp);
}
}
+EXPORT_SYMBOL_GPL(nfs_put_client);
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
/*
@@ -601,6 +605,7 @@ static int nfs_create_rpc_client(struct nfs_client *clp,
{
struct rpc_clnt *clnt = NULL;
struct rpc_create_args args = {
+ .net = &init_net,
.protocol = clp->cl_proto,
.address = (struct sockaddr *)&clp->cl_addr,
.addrsize = clp->cl_addrlen,
@@ -900,6 +905,8 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *
if (server->wsize > NFS_MAX_FILE_IO_SIZE)
server->wsize = NFS_MAX_FILE_IO_SIZE;
server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ set_pnfs_layoutdriver(server, fsinfo->layouttype);
+
server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);
server->dtsize = nfs_block_size(fsinfo->dtpref, NULL);
@@ -939,6 +946,7 @@ static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, str
}
fsinfo.fattr = fattr;
+ fsinfo.layouttype = 0;
error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
if (error < 0)
goto out_error;
@@ -1021,6 +1029,7 @@ void nfs_free_server(struct nfs_server *server)
{
dprintk("--> nfs_free_server()\n");
+ unset_pnfs_layoutdriver(server);
spin_lock(&nfs_client_lock);
list_del(&server->client_link);
list_del(&server->master_link);
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index dba50a5625db..a6e711ad130f 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -167,7 +167,7 @@ static int nfs_dns_show(struct seq_file *m, struct cache_detail *cd,
return 0;
}
item = container_of(h, struct nfs_dns_ent, h);
- ttl = (long)item->h.expiry_time - (long)get_seconds();
+ ttl = item->h.expiry_time - seconds_since_boot();
if (ttl < 0)
ttl = 0;
@@ -239,7 +239,7 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen)
ttl = get_expiry(&buf);
if (ttl == 0)
goto out;
- key.h.expiry_time = ttl + get_seconds();
+ key.h.expiry_time = ttl + seconds_since_boot();
ret = -ENOMEM;
item = nfs_dns_lookup(cd, &key);
@@ -301,7 +301,7 @@ static int do_cache_lookup_nowait(struct cache_detail *cd,
goto out_err;
ret = -ETIMEDOUT;
if (!test_bit(CACHE_VALID, &(*item)->h.flags)
- || (*item)->h.expiry_time < get_seconds()
+ || (*item)->h.expiry_time < seconds_since_boot()
|| cd->flush_time > (*item)->h.last_refresh)
goto out_put;
ret = -ENOENT;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index e18c31e08a28..e756075637b0 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -36,6 +36,7 @@
#include "internal.h"
#include "iostat.h"
#include "fscache.h"
+#include "pnfs.h"
#define NFSDBG_FACILITY NFSDBG_FILE
@@ -386,6 +387,10 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
file->f_path.dentry->d_name.name,
mapping->host->i_ino, len, (long long) pos);
+ pnfs_update_layout(mapping->host,
+ nfs_file_open_context(file),
+ IOMODE_RW);
+
start:
/*
* Prevent starvation issues if someone is doing a consistency
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 6eec28656415..314f57164602 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -48,6 +48,7 @@
#include "internal.h"
#include "fscache.h"
#include "dns_resolve.h"
+#include "pnfs.h"
#define NFSDBG_FACILITY NFSDBG_VFS
@@ -1410,6 +1411,7 @@ void nfs4_evict_inode(struct inode *inode)
{
truncate_inode_pages(&inode->i_data, 0);
end_writeback(inode);
+ pnfs_destroy_layout(NFS_I(inode));
/* If we are holding a delegation, return it! */
nfs_inode_return_delegation_noreclaim(inode);
/* First call standard NFS clear_inode() code */
@@ -1447,6 +1449,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
nfsi->delegation = NULL;
nfsi->delegation_state = 0;
init_rwsem(&nfsi->rwsem);
+ nfsi->layout = NULL;
#endif
}
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index d610203d95c6..eceafe74f473 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -153,6 +153,7 @@ int nfs_mount(struct nfs_mount_request *info)
.rpc_resp = &result,
};
struct rpc_create_args args = {
+ .net = &init_net,
.protocol = info->protocol,
.address = info->sap,
.addrsize = info->salen,
@@ -224,6 +225,7 @@ void nfs_umount(const struct nfs_mount_request *info)
.to_retries = 2,
};
struct rpc_create_args args = {
+ .net = &init_net,
.protocol = IPPROTO_UDP,
.address = info->sap,
.addrsize = info->salen,
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
new file mode 100644
index 000000000000..2e92f0d8d654
--- /dev/null
+++ b/fs/nfs/nfs4filelayout.c
@@ -0,0 +1,280 @@
+/*
+ * Module for the pnfs nfs4 file layout driver.
+ * Defines all I/O and Policy interface operations, plus code
+ * to register itself with the pNFS client.
+ *
+ * Copyright (c) 2002
+ * The Regents of the University of Michigan
+ * All Rights Reserved
+ *
+ * Dean Hildebrand <dhildebz@umich.edu>
+ *
+ * Permission is granted to use, copy, create derivative works, and
+ * redistribute this software and such derivative works for any purpose,
+ * so long as the name of the University of Michigan is not used in
+ * any advertising or publicity pertaining to the use or distribution
+ * of this software without specific, written prior authorization. If
+ * the above copyright notice or any other identification of the
+ * University of Michigan is included in any copy of any portion of
+ * this software, then the disclaimer below must also be included.
+ *
+ * This software is provided as is, without representation or warranty
+ * of any kind either express or implied, including without limitation
+ * the implied warranties of merchantability, fitness for a particular
+ * purpose, or noninfringement. The Regents of the University of
+ * Michigan shall not be liable for any damages, including special,
+ * indirect, incidental, or consequential damages, with respect to any
+ * claim arising out of or in connection with the use of the software,
+ * even if it has been or is hereafter advised of the possibility of
+ * such damages.
+ */
+
+#include <linux/nfs_fs.h>
+
+#include "internal.h"
+#include "nfs4filelayout.h"
+
+#define NFSDBG_FACILITY NFSDBG_PNFS_LD
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Dean Hildebrand <dhildebz@umich.edu>");
+MODULE_DESCRIPTION("The NFSv4 file layout driver");
+
+static int
+filelayout_set_layoutdriver(struct nfs_server *nfss)
+{
+ int status = pnfs_alloc_init_deviceid_cache(nfss->nfs_client,
+ nfs4_fl_free_deviceid_callback);
+ if (status) {
+ printk(KERN_WARNING "%s: deviceid cache could not be "
+ "initialized\n", __func__);
+ return status;
+ }
+ dprintk("%s: deviceid cache has been initialized successfully\n",
+ __func__);
+ return 0;
+}
+
+/* Clear out the layout by destroying its device list */
+static int
+filelayout_clear_layoutdriver(struct nfs_server *nfss)
+{
+ dprintk("--> %s\n", __func__);
+
+ if (nfss->nfs_client->cl_devid_cache)
+ pnfs_put_deviceid_cache(nfss->nfs_client);
+ return 0;
+}
+
+/*
+ * filelayout_check_layout()
+ *
+ * Make sure layout segment parameters are sane WRT the device.
+ * At this point no generic layer initialization of the lseg has occurred,
+ * and nothing has been added to the layout_hdr cache.
+ *
+ */
+static int
+filelayout_check_layout(struct pnfs_layout_hdr *lo,
+ struct nfs4_filelayout_segment *fl,
+ struct nfs4_layoutget_res *lgr,
+ struct nfs4_deviceid *id)
+{
+ struct nfs4_file_layout_dsaddr *dsaddr;
+ int status = -EINVAL;
+ struct nfs_server *nfss = NFS_SERVER(lo->inode);
+
+ dprintk("--> %s\n", __func__);
+
+ if (fl->pattern_offset > lgr->range.offset) {
+ dprintk("%s pattern_offset %lld to large\n",
+ __func__, fl->pattern_offset);
+ goto out;
+ }
+
+ if (fl->stripe_unit % PAGE_SIZE) {
+ dprintk("%s Stripe unit (%u) not page aligned\n",
+ __func__, fl->stripe_unit);
+ goto out;
+ }
+
+ /* find and reference the deviceid */
+ dsaddr = nfs4_fl_find_get_deviceid(nfss->nfs_client, id);
+ if (dsaddr == NULL) {
+ dsaddr = get_device_info(lo->inode, id);
+ if (dsaddr == NULL)
+ goto out;
+ }
+ fl->dsaddr = dsaddr;
+
+ if (fl->first_stripe_index < 0 ||
+ fl->first_stripe_index >= dsaddr->stripe_count) {
+ dprintk("%s Bad first_stripe_index %d\n",
+ __func__, fl->first_stripe_index);
+ goto out_put;
+ }
+
+ if ((fl->stripe_type == STRIPE_SPARSE &&
+ fl->num_fh > 1 && fl->num_fh != dsaddr->ds_num) ||
+ (fl->stripe_type == STRIPE_DENSE &&
+ fl->num_fh != dsaddr->stripe_count)) {
+ dprintk("%s num_fh %u not valid for given packing\n",
+ __func__, fl->num_fh);
+ goto out_put;
+ }
+
+ if (fl->stripe_unit % nfss->rsize || fl->stripe_unit % nfss->wsize) {
+ dprintk("%s Stripe unit (%u) not aligned with rsize %u "
+ "wsize %u\n", __func__, fl->stripe_unit, nfss->rsize,
+ nfss->wsize);
+ }
+
+ status = 0;
+out:
+ dprintk("--> %s returns %d\n", __func__, status);
+ return status;
+out_put:
+ pnfs_put_deviceid(nfss->nfs_client->cl_devid_cache, &dsaddr->deviceid);
+ goto out;
+}
+
+static void filelayout_free_fh_array(struct nfs4_filelayout_segment *fl)
+{
+ int i;
+
+ for (i = 0; i < fl->num_fh; i++) {
+ if (!fl->fh_array[i])
+ break;
+ kfree(fl->fh_array[i]);
+ }
+ kfree(fl->fh_array);
+ fl->fh_array = NULL;
+}
+
+static void
+_filelayout_free_lseg(struct nfs4_filelayout_segment *fl)
+{
+ filelayout_free_fh_array(fl);
+ kfree(fl);
+}
+
+static int
+filelayout_decode_layout(struct pnfs_layout_hdr *flo,
+ struct nfs4_filelayout_segment *fl,
+ struct nfs4_layoutget_res *lgr,
+ struct nfs4_deviceid *id)
+{
+ uint32_t *p = (uint32_t *)lgr->layout.buf;
+ uint32_t nfl_util;
+ int i;
+
+ dprintk("%s: set_layout_map Begin\n", __func__);
+
+ memcpy(id, p, sizeof(*id));
+ p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);
+ print_deviceid(id);
+
+ nfl_util = be32_to_cpup(p++);
+ if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS)
+ fl->commit_through_mds = 1;
+ if (nfl_util & NFL4_UFLG_DENSE)
+ fl->stripe_type = STRIPE_DENSE;
+ else
+ fl->stripe_type = STRIPE_SPARSE;
+ fl->stripe_unit = nfl_util & ~NFL4_UFLG_MASK;
+
+ fl->first_stripe_index = be32_to_cpup(p++);
+ p = xdr_decode_hyper(p, &fl->pattern_offset);
+ fl->num_fh = be32_to_cpup(p++);
+
+ dprintk("%s: nfl_util 0x%X num_fh %u fsi %u po %llu\n",
+ __func__, nfl_util, fl->num_fh, fl->first_stripe_index,
+ fl->pattern_offset);
+
+ fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *),
+ GFP_KERNEL);
+ if (!fl->fh_array)
+ return -ENOMEM;
+
+ for (i = 0; i < fl->num_fh; i++) {
+ /* Do we want to use a mempool here? */
+ fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL);
+ if (!fl->fh_array[i]) {
+ filelayout_free_fh_array(fl);
+ return -ENOMEM;
+ }
+ fl->fh_array[i]->size = be32_to_cpup(p++);
+ if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) {
+ printk(KERN_ERR "Too big fh %d received %d\n",
+ i, fl->fh_array[i]->size);
+ filelayout_free_fh_array(fl);
+ return -EIO;
+ }
+ memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size);
+ p += XDR_QUADLEN(fl->fh_array[i]->size);
+ dprintk("DEBUG: %s: fh len %d\n", __func__,
+ fl->fh_array[i]->size);
+ }
+
+ return 0;
+}
+
+static struct pnfs_layout_segment *
+filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
+ struct nfs4_layoutget_res *lgr)
+{
+ struct nfs4_filelayout_segment *fl;
+ int rc;
+ struct nfs4_deviceid id;
+
+ dprintk("--> %s\n", __func__);
+ fl = kzalloc(sizeof(*fl), GFP_KERNEL);
+ if (!fl)
+ return NULL;
+
+ rc = filelayout_decode_layout(layoutid, fl, lgr, &id);
+ if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id)) {
+ _filelayout_free_lseg(fl);
+ return NULL;
+ }
+ return &fl->generic_hdr;
+}
+
+static void
+filelayout_free_lseg(struct pnfs_layout_segment *lseg)
+{
+ struct nfs_server *nfss = NFS_SERVER(lseg->layout->inode);
+ struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
+
+ dprintk("--> %s\n", __func__);
+ pnfs_put_deviceid(nfss->nfs_client->cl_devid_cache,
+ &fl->dsaddr->deviceid);
+ _filelayout_free_lseg(fl);
+}
+
+static struct pnfs_layoutdriver_type filelayout_type = {
+ .id = LAYOUT_NFSV4_1_FILES,
+ .name = "LAYOUT_NFSV4_1_FILES",
+ .owner = THIS_MODULE,
+ .set_layoutdriver = filelayout_set_layoutdriver,
+ .clear_layoutdriver = filelayout_clear_layoutdriver,
+ .alloc_lseg = filelayout_alloc_lseg,
+ .free_lseg = filelayout_free_lseg,
+};
+
+static int __init nfs4filelayout_init(void)
+{
+ printk(KERN_INFO "%s: NFSv4 File Layout Driver Registering...\n",
+ __func__);
+ return pnfs_register_layoutdriver(&filelayout_type);
+}
+
+static void __exit nfs4filelayout_exit(void)
+{
+ printk(KERN_INFO "%s: NFSv4 File Layout Driver Unregistering...\n",
+ __func__);
+ pnfs_unregister_layoutdriver(&filelayout_type);
+}
+
+module_init(nfs4filelayout_init);
+module_exit(nfs4filelayout_exit);
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
new file mode 100644
index 000000000000..bbf60dd2ab9d
--- /dev/null
+++ b/fs/nfs/nfs4filelayout.h
@@ -0,0 +1,94 @@
+/*
+ * NFSv4 file layout driver data structures.
+ *
+ * Copyright (c) 2002
+ * The Regents of the University of Michigan
+ * All Rights Reserved
+ *
+ * Dean Hildebrand <dhildebz@umich.edu>
+ *
+ * Permission is granted to use, copy, create derivative works, and
+ * redistribute this software and such derivative works for any purpose,
+ * so long as the name of the University of Michigan is not used in
+ * any advertising or publicity pertaining to the use or distribution
+ * of this software without specific, written prior authorization. If
+ * the above copyright notice or any other identification of the
+ * University of Michigan is included in any copy of any portion of
+ * this software, then the disclaimer below must also be included.
+ *
+ * This software is provided as is, without representation or warranty
+ * of any kind either express or implied, including without limitation
+ * the implied warranties of merchantability, fitness for a particular
+ * purpose, or noninfringement. The Regents of the University of
+ * Michigan shall not be liable for any damages, including special,
+ * indirect, incidental, or consequential damages, with respect to any
+ * claim arising out of or in connection with the use of the software,
+ * even if it has been or is hereafter advised of the possibility of
+ * such damages.
+ */
+
+#ifndef FS_NFS_NFS4FILELAYOUT_H
+#define FS_NFS_NFS4FILELAYOUT_H
+
+#include "pnfs.h"
+
+/*
+ * Field testing shows we need to support upto 4096 stripe indices.
+ * We store each index as a u8 (u32 on the wire) to keep the memory footprint
+ * reasonable. This in turn means we support a maximum of 256
+ * RFC 5661 multipath_list4 structures.
+ */
+#define NFS4_PNFS_MAX_STRIPE_CNT 4096
+#define NFS4_PNFS_MAX_MULTI_CNT 256 /* 256 fit into a u8 stripe_index */
+
+enum stripetype4 {
+ STRIPE_SPARSE = 1,
+ STRIPE_DENSE = 2
+};
+
+/* Individual ip address */
+struct nfs4_pnfs_ds {
+ struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */
+ u32 ds_ip_addr;
+ u32 ds_port;
+ struct nfs_client *ds_clp;
+ atomic_t ds_count;
+};
+
+struct nfs4_file_layout_dsaddr {
+ struct pnfs_deviceid_node deviceid;
+ u32 stripe_count;
+ u8 *stripe_indices;
+ u32 ds_num;
+ struct nfs4_pnfs_ds *ds_list[1];
+};
+
+struct nfs4_filelayout_segment {
+ struct pnfs_layout_segment generic_hdr;
+ u32 stripe_type;
+ u32 commit_through_mds;
+ u32 stripe_unit;
+ u32 first_stripe_index;
+ u64 pattern_offset;
+ struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */
+ unsigned int num_fh;
+ struct nfs_fh **fh_array;
+};
+
+static inline struct nfs4_filelayout_segment *
+FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg)
+{
+ return container_of(lseg,
+ struct nfs4_filelayout_segment,
+ generic_hdr);
+}
+
+extern void nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *);
+extern void print_ds(struct nfs4_pnfs_ds *ds);
+extern void print_deviceid(struct nfs4_deviceid *dev_id);
+extern struct nfs4_file_layout_dsaddr *
+nfs4_fl_find_get_deviceid(struct nfs_client *, struct nfs4_deviceid *dev_id);
+struct nfs4_file_layout_dsaddr *
+get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id);
+
+#endif /* FS_NFS_NFS4FILELAYOUT_H */
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
new file mode 100644
index 000000000000..51fe64ace55a
--- /dev/null
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -0,0 +1,448 @@
+/*
+ * Device operations for the pnfs nfs4 file layout driver.
+ *
+ * Copyright (c) 2002
+ * The Regents of the University of Michigan
+ * All Rights Reserved
+ *
+ * Dean Hildebrand <dhildebz@umich.edu>
+ * Garth Goodson <Garth.Goodson@netapp.com>
+ *
+ * Permission is granted to use, copy, create derivative works, and
+ * redistribute this software and such derivative works for any purpose,
+ * so long as the name of the University of Michigan is not used in
+ * any advertising or publicity pertaining to the use or distribution
+ * of this software without specific, written prior authorization. If
+ * the above copyright notice or any other identification of the
+ * University of Michigan is included in any copy of any portion of
+ * this software, then the disclaimer below must also be included.
+ *
+ * This software is provided as is, without representation or warranty
+ * of any kind either express or implied, including without limitation
+ * the implied warranties of merchantability, fitness for a particular
+ * purpose, or noninfringement. The Regents of the University of
+ * Michigan shall not be liable for any damages, including special,
+ * indirect, incidental, or consequential damages, with respect to any
+ * claim arising out of or in connection with the use of the software,
+ * even if it has been or is hereafter advised of the possibility of
+ * such damages.
+ */
+
+#include <linux/nfs_fs.h>
+#include <linux/vmalloc.h>
+
+#include "internal.h"
+#include "nfs4filelayout.h"
+
+#define NFSDBG_FACILITY NFSDBG_PNFS_LD
+
+/*
+ * Data server cache
+ *
+ * Data servers can be mapped to different device ids.
+ * nfs4_pnfs_ds reference counting
+ * - set to 1 on allocation
+ * - incremented when a device id maps a data server already in the cache.
+ * - decremented when deviceid is removed from the cache.
+ */
+DEFINE_SPINLOCK(nfs4_ds_cache_lock);
+static LIST_HEAD(nfs4_data_server_cache);
+
+/* Debug routines */
+void
+print_ds(struct nfs4_pnfs_ds *ds)
+{
+ if (ds == NULL) {
+ printk("%s NULL device\n", __func__);
+ return;
+ }
+ printk(" ip_addr %x port %hu\n"
+ " ref count %d\n"
+ " client %p\n"
+ " cl_exchange_flags %x\n",
+ ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
+ atomic_read(&ds->ds_count), ds->ds_clp,
+ ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
+}
+
+void
+print_ds_list(struct nfs4_file_layout_dsaddr *dsaddr)
+{
+ int i;
+
+ ifdebug(FACILITY) {
+ printk("%s dsaddr->ds_num %d\n", __func__,
+ dsaddr->ds_num);
+ for (i = 0; i < dsaddr->ds_num; i++)
+ print_ds(dsaddr->ds_list[i]);
+ }
+}
+
+void print_deviceid(struct nfs4_deviceid *id)
+{
+ u32 *p = (u32 *)id;
+
+ dprintk("%s: device id= [%x%x%x%x]\n", __func__,
+ p[0], p[1], p[2], p[3]);
+}
+
+/* nfs4_ds_cache_lock is held */
+static struct nfs4_pnfs_ds *
+_data_server_lookup_locked(u32 ip_addr, u32 port)
+{
+ struct nfs4_pnfs_ds *ds;
+
+ dprintk("_data_server_lookup: ip_addr=%x port=%hu\n",
+ ntohl(ip_addr), ntohs(port));
+
+ list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
+ if (ds->ds_ip_addr == ip_addr &&
+ ds->ds_port == port) {
+ return ds;
+ }
+ }
+ return NULL;
+}
+
+static void
+destroy_ds(struct nfs4_pnfs_ds *ds)
+{
+ dprintk("--> %s\n", __func__);
+ ifdebug(FACILITY)
+ print_ds(ds);
+
+ if (ds->ds_clp)
+ nfs_put_client(ds->ds_clp);
+ kfree(ds);
+}
+
+static void
+nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
+{
+ struct nfs4_pnfs_ds *ds;
+ int i;
+
+ print_deviceid(&dsaddr->deviceid.de_id);
+
+ for (i = 0; i < dsaddr->ds_num; i++) {
+ ds = dsaddr->ds_list[i];
+ if (ds != NULL) {
+ if (atomic_dec_and_lock(&ds->ds_count,
+ &nfs4_ds_cache_lock)) {
+ list_del_init(&ds->ds_node);
+ spin_unlock(&nfs4_ds_cache_lock);
+ destroy_ds(ds);
+ }
+ }
+ }
+ kfree(dsaddr->stripe_indices);
+ kfree(dsaddr);
+}
+
+void
+nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *device)
+{
+ struct nfs4_file_layout_dsaddr *dsaddr =
+ container_of(device, struct nfs4_file_layout_dsaddr, deviceid);
+
+ nfs4_fl_free_deviceid(dsaddr);
+}
+
+static struct nfs4_pnfs_ds *
+nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port)
+{
+ struct nfs4_pnfs_ds *tmp_ds, *ds;
+
+ ds = kzalloc(sizeof(*tmp_ds), GFP_KERNEL);
+ if (!ds)
+ goto out;
+
+ spin_lock(&nfs4_ds_cache_lock);
+ tmp_ds = _data_server_lookup_locked(ip_addr, port);
+ if (tmp_ds == NULL) {
+ ds->ds_ip_addr = ip_addr;
+ ds->ds_port = port;
+ atomic_set(&ds->ds_count, 1);
+ INIT_LIST_HEAD(&ds->ds_node);
+ ds->ds_clp = NULL;
+ list_add(&ds->ds_node, &nfs4_data_server_cache);
+ dprintk("%s add new data server ip 0x%x\n", __func__,
+ ds->ds_ip_addr);
+ } else {
+ kfree(ds);
+ atomic_inc(&tmp_ds->ds_count);
+ dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n",
+ __func__, tmp_ds->ds_ip_addr,
+ atomic_read(&tmp_ds->ds_count));
+ ds = tmp_ds;
+ }
+ spin_unlock(&nfs4_ds_cache_lock);
+out:
+ return ds;
+}
+
+/*
+ * Currently only support ipv4, and one multi-path address.
+ */
+static struct nfs4_pnfs_ds *
+decode_and_add_ds(__be32 **pp, struct inode *inode)
+{
+ struct nfs4_pnfs_ds *ds = NULL;
+ char *buf;
+ const char *ipend, *pstr;
+ u32 ip_addr, port;
+ int nlen, rlen, i;
+ int tmp[2];
+ __be32 *r_netid, *r_addr, *p = *pp;
+
+ /* r_netid */
+ nlen = be32_to_cpup(p++);
+ r_netid = p;
+ p += XDR_QUADLEN(nlen);
+
+ /* r_addr */
+ rlen = be32_to_cpup(p++);
+ r_addr = p;
+ p += XDR_QUADLEN(rlen);
+ *pp = p;
+
+ /* Check that netid is "tcp" */
+ if (nlen != 3 || memcmp((char *)r_netid, "tcp", 3)) {
+ dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__);
+ goto out_err;
+ }
+
+ /* ipv6 length plus port is legal */
+ if (rlen > INET6_ADDRSTRLEN + 8) {
+ dprintk("%s Invalid address, length %d\n", __func__,
+ rlen);
+ goto out_err;
+ }
+ buf = kmalloc(rlen + 1, GFP_KERNEL);
+ buf[rlen] = '\0';
+ memcpy(buf, r_addr, rlen);
+
+ /* replace the port dots with dashes for the in4_pton() delimiter*/
+ for (i = 0; i < 2; i++) {
+ char *res = strrchr(buf, '.');
+ *res = '-';
+ }
+
+ /* Currently only support ipv4 address */
+ if (in4_pton(buf, rlen, (u8 *)&ip_addr, '-', &ipend) == 0) {
+ dprintk("%s: Only ipv4 addresses supported\n", __func__);
+ goto out_free;
+ }
+
+ /* port */
+ pstr = ipend;
+ sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]);
+ port = htons((tmp[0] << 8) | (tmp[1]));
+
+ ds = nfs4_pnfs_ds_add(inode, ip_addr, port);
+ dprintk("%s Decoded address and port %s\n", __func__, buf);
+out_free:
+ kfree(buf);
+out_err:
+ return ds;
+}
+
+/* Decode opaque device data and return the result */
+static struct nfs4_file_layout_dsaddr*
+decode_device(struct inode *ino, struct pnfs_device *pdev)
+{
+ int i, dummy;
+ u32 cnt, num;
+ u8 *indexp;
+ __be32 *p = (__be32 *)pdev->area, *indicesp;
+ struct nfs4_file_layout_dsaddr *dsaddr;
+
+ /* Get the stripe count (number of stripe index) */
+ cnt = be32_to_cpup(p++);
+ dprintk("%s stripe count %d\n", __func__, cnt);
+ if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) {
+ printk(KERN_WARNING "%s: stripe count %d greater than "
+ "supported maximum %d\n", __func__,
+ cnt, NFS4_PNFS_MAX_STRIPE_CNT);
+ goto out_err;
+ }
+
+ /* Check the multipath list count */
+ indicesp = p;
+ p += XDR_QUADLEN(cnt << 2);
+ num = be32_to_cpup(p++);
+ dprintk("%s ds_num %u\n", __func__, num);
+ if (num > NFS4_PNFS_MAX_MULTI_CNT) {
+ printk(KERN_WARNING "%s: multipath count %d greater than "
+ "supported maximum %d\n", __func__,
+ num, NFS4_PNFS_MAX_MULTI_CNT);
+ goto out_err;
+ }
+ dsaddr = kzalloc(sizeof(*dsaddr) +
+ (sizeof(struct nfs4_pnfs_ds *) * (num - 1)),
+ GFP_KERNEL);
+ if (!dsaddr)
+ goto out_err;
+
+ dsaddr->stripe_indices = kzalloc(sizeof(u8) * cnt, GFP_KERNEL);
+ if (!dsaddr->stripe_indices)
+ goto out_err_free;
+
+ dsaddr->stripe_count = cnt;
+ dsaddr->ds_num = num;
+
+ memcpy(&dsaddr->deviceid.de_id, &pdev->dev_id, sizeof(pdev->dev_id));
+
+ /* Go back an read stripe indices */
+ p = indicesp;
+ indexp = &dsaddr->stripe_indices[0];
+ for (i = 0; i < dsaddr->stripe_count; i++) {
+ *indexp = be32_to_cpup(p++);
+ if (*indexp >= num)
+ goto out_err_free;
+ indexp++;
+ }
+ /* Skip already read multipath list count */
+ p++;
+
+ for (i = 0; i < dsaddr->ds_num; i++) {
+ int j;
+
+ dummy = be32_to_cpup(p++); /* multipath count */
+ if (dummy > 1) {
+ printk(KERN_WARNING
+ "%s: Multipath count %d not supported, "
+ "skipping all greater than 1\n", __func__,
+ dummy);
+ }
+ for (j = 0; j < dummy; j++) {
+ if (j == 0) {
+ dsaddr->ds_list[i] = decode_and_add_ds(&p, ino);
+ if (dsaddr->ds_list[i] == NULL)
+ goto out_err_free;
+ } else {
+ u32 len;
+ /* skip extra multipath */
+ len = be32_to_cpup(p++);
+ p += XDR_QUADLEN(len);
+ len = be32_to_cpup(p++);
+ p += XDR_QUADLEN(len);
+ continue;
+ }
+ }
+ }
+ return dsaddr;
+
+out_err_free:
+ nfs4_fl_free_deviceid(dsaddr);
+out_err:
+ dprintk("%s ERROR: returning NULL\n", __func__);
+ return NULL;
+}
+
+/*
+ * Decode the opaque device specified in 'dev'
+ * and add it to the list of available devices.
+ * If the deviceid is already cached, nfs4_add_deviceid will return
+ * a pointer to the cached struct and throw away the new.
+ */
+static struct nfs4_file_layout_dsaddr*
+decode_and_add_device(struct inode *inode, struct pnfs_device *dev)
+{
+ struct nfs4_file_layout_dsaddr *dsaddr;
+ struct pnfs_deviceid_node *d;
+
+ dsaddr = decode_device(inode, dev);
+ if (!dsaddr) {
+ printk(KERN_WARNING "%s: Could not decode or add device\n",
+ __func__);
+ return NULL;
+ }
+
+ d = pnfs_add_deviceid(NFS_SERVER(inode)->nfs_client->cl_devid_cache,
+ &dsaddr->deviceid);
+
+ return container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
+}
+
+/*
+ * Retrieve the information for dev_id, add it to the list
+ * of available devices, and return it.
+ */
+struct nfs4_file_layout_dsaddr *
+get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
+{
+ struct pnfs_device *pdev = NULL;
+ u32 max_resp_sz;
+ int max_pages;
+ struct page **pages = NULL;
+ struct nfs4_file_layout_dsaddr *dsaddr = NULL;
+ int rc, i;
+ struct nfs_server *server = NFS_SERVER(inode);
+
+ /*
+ * Use the session max response size as the basis for setting
+ * GETDEVICEINFO's maxcount
+ */
+ max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
+ max_pages = max_resp_sz >> PAGE_SHIFT;
+ dprintk("%s inode %p max_resp_sz %u max_pages %d\n",
+ __func__, inode, max_resp_sz, max_pages);
+
+ pdev = kzalloc(sizeof(struct pnfs_device), GFP_KERNEL);
+ if (pdev == NULL)
+ return NULL;
+
+ pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
+ if (pages == NULL) {
+ kfree(pdev);
+ return NULL;
+ }
+ for (i = 0; i < max_pages; i++) {
+ pages[i] = alloc_page(GFP_KERNEL);
+ if (!pages[i])
+ goto out_free;
+ }
+
+ /* set pdev->area */
+ pdev->area = vmap(pages, max_pages, VM_MAP, PAGE_KERNEL);
+ if (!pdev->area)
+ goto out_free;
+
+ memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id));
+ pdev->layout_type = LAYOUT_NFSV4_1_FILES;
+ pdev->pages = pages;
+ pdev->pgbase = 0;
+ pdev->pglen = PAGE_SIZE * max_pages;
+ pdev->mincount = 0;
+
+ rc = nfs4_proc_getdeviceinfo(server, pdev);
+ dprintk("%s getdevice info returns %d\n", __func__, rc);
+ if (rc)
+ goto out_free;
+
+ /*
+ * Found new device, need to decode it and then add it to the
+ * list of known devices for this mountpoint.
+ */
+ dsaddr = decode_and_add_device(inode, pdev);
+out_free:
+ if (pdev->area != NULL)
+ vunmap(pdev->area);
+ for (i = 0; i < max_pages; i++)
+ __free_page(pages[i]);
+ kfree(pages);
+ kfree(pdev);
+ dprintk("<-- %s dsaddr %p\n", __func__, dsaddr);
+ return dsaddr;
+}
+
+struct nfs4_file_layout_dsaddr *
+nfs4_fl_find_get_deviceid(struct nfs_client *clp, struct nfs4_deviceid *id)
+{
+ struct pnfs_deviceid_node *d;
+
+ d = pnfs_find_get_deviceid(clp->cl_devid_cache, id);
+ return (d == NULL) ? NULL :
+ container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
+}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index e87fe612ca18..32c8758c99fd 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -55,6 +55,7 @@
#include "internal.h"
#include "iostat.h"
#include "callback.h"
+#include "pnfs.h"
#define NFSDBG_FACILITY NFSDBG_PROC
@@ -130,6 +131,7 @@ const u32 nfs4_fsinfo_bitmap[2] = { FATTR4_WORD0_MAXFILESIZE
| FATTR4_WORD0_MAXWRITE
| FATTR4_WORD0_LEASE_TIME,
FATTR4_WORD1_TIME_DELTA
+ | FATTR4_WORD1_FS_LAYOUT_TYPES
};
const u32 nfs4_fs_locations_bitmap[2] = {
@@ -4840,49 +4842,56 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args)
args->bc_attrs.max_reqs);
}
-static int _verify_channel_attr(char *chan, char *attr_name, u32 sent, u32 rcvd)
+static int nfs4_verify_fore_channel_attrs(struct nfs41_create_session_args *args, struct nfs4_session *session)
{
- if (rcvd <= sent)
- return 0;
- printk(KERN_WARNING "%s: Session INVALID: %s channel %s increased. "
- "sent=%u rcvd=%u\n", __func__, chan, attr_name, sent, rcvd);
- return -EINVAL;
+ struct nfs4_channel_attrs *sent = &args->fc_attrs;
+ struct nfs4_channel_attrs *rcvd = &session->fc_attrs;
+
+ if (rcvd->headerpadsz > sent->headerpadsz)
+ return -EINVAL;
+ if (rcvd->max_resp_sz > sent->max_resp_sz)
+ return -EINVAL;
+ /*
+ * Our requested max_ops is the minimum we need; we're not
+ * prepared to break up compounds into smaller pieces than that.
+ * So, no point even trying to continue if the server won't
+ * cooperate:
+ */
+ if (rcvd->max_ops < sent->max_ops)
+ return -EINVAL;
+ if (rcvd->max_reqs == 0)
+ return -EINVAL;
+ return 0;
}
-#define _verify_fore_channel_attr(_name_) \
- _verify_channel_attr("fore", #_name_, \
- args->fc_attrs._name_, \
- session->fc_attrs._name_)
+static int nfs4_verify_back_channel_attrs(struct nfs41_create_session_args *args, struct nfs4_session *session)
+{
+ struct nfs4_channel_attrs *sent = &args->bc_attrs;
+ struct nfs4_channel_attrs *rcvd = &session->bc_attrs;
-#define _verify_back_channel_attr(_name_) \
- _verify_channel_attr("back", #_name_, \
- args->bc_attrs._name_, \
- session->bc_attrs._name_)
+ if (rcvd->max_rqst_sz > sent->max_rqst_sz)
+ return -EINVAL;
+ if (rcvd->max_resp_sz < sent->max_resp_sz)
+ return -EINVAL;
+ if (rcvd->max_resp_sz_cached > sent->max_resp_sz_cached)
+ return -EINVAL;
+ /* These would render the backchannel useless: */
+ if (rcvd->max_ops == 0)
+ return -EINVAL;
+ if (rcvd->max_reqs == 0)
+ return -EINVAL;
+ return 0;
+}
-/*
- * The server is not allowed to increase the fore channel header pad size,
- * maximum response size, or maximum number of operations.
- *
- * The back channel attributes are only negotiatied down: We send what the
- * (back channel) server insists upon.
- */
static int nfs4_verify_channel_attrs(struct nfs41_create_session_args *args,
struct nfs4_session *session)
{
- int ret = 0;
-
- ret |= _verify_fore_channel_attr(headerpadsz);
- ret |= _verify_fore_channel_attr(max_resp_sz);
- ret |= _verify_fore_channel_attr(max_ops);
-
- ret |= _verify_back_channel_attr(headerpadsz);
- ret |= _verify_back_channel_attr(max_rqst_sz);
- ret |= _verify_back_channel_attr(max_resp_sz);
- ret |= _verify_back_channel_attr(max_resp_sz_cached);
- ret |= _verify_back_channel_attr(max_ops);
- ret |= _verify_back_channel_attr(max_reqs);
+ int ret;
- return ret;
+ ret = nfs4_verify_fore_channel_attrs(args, session);
+ if (ret)
+ return ret;
+ return nfs4_verify_back_channel_attrs(args, session);
}
static int _nfs4_proc_create_session(struct nfs_client *clp)
@@ -5255,6 +5264,147 @@ out:
dprintk("<-- %s status=%d\n", __func__, status);
return status;
}
+
+static void
+nfs4_layoutget_prepare(struct rpc_task *task, void *calldata)
+{
+ struct nfs4_layoutget *lgp = calldata;
+ struct inode *ino = lgp->args.inode;
+ struct nfs_server *server = NFS_SERVER(ino);
+
+ dprintk("--> %s\n", __func__);
+ if (nfs4_setup_sequence(server, &lgp->args.seq_args,
+ &lgp->res.seq_res, 0, task))
+ return;
+ rpc_call_start(task);
+}
+
+static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
+{
+ struct nfs4_layoutget *lgp = calldata;
+ struct nfs_server *server = NFS_SERVER(lgp->args.inode);
+
+ dprintk("--> %s\n", __func__);
+
+ if (!nfs4_sequence_done(task, &lgp->res.seq_res))
+ return;
+
+ switch (task->tk_status) {
+ case 0:
+ break;
+ case -NFS4ERR_LAYOUTTRYLATER:
+ case -NFS4ERR_RECALLCONFLICT:
+ task->tk_status = -NFS4ERR_DELAY;
+ /* Fall through */
+ default:
+ if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
+ rpc_restart_call_prepare(task);
+ return;
+ }
+ }
+ lgp->status = task->tk_status;
+ dprintk("<-- %s\n", __func__);
+}
+
+static void nfs4_layoutget_release(void *calldata)
+{
+ struct nfs4_layoutget *lgp = calldata;
+
+ dprintk("--> %s\n", __func__);
+ put_layout_hdr(lgp->args.inode);
+ if (lgp->res.layout.buf != NULL)
+ free_page((unsigned long) lgp->res.layout.buf);
+ put_nfs_open_context(lgp->args.ctx);
+ kfree(calldata);
+ dprintk("<-- %s\n", __func__);
+}
+
+static const struct rpc_call_ops nfs4_layoutget_call_ops = {
+ .rpc_call_prepare = nfs4_layoutget_prepare,
+ .rpc_call_done = nfs4_layoutget_done,
+ .rpc_release = nfs4_layoutget_release,
+};
+
+int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
+{
+ struct nfs_server *server = NFS_SERVER(lgp->args.inode);
+ struct rpc_task *task;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET],
+ .rpc_argp = &lgp->args,
+ .rpc_resp = &lgp->res,
+ };
+ struct rpc_task_setup task_setup_data = {
+ .rpc_client = server->client,
+ .rpc_message = &msg,
+ .callback_ops = &nfs4_layoutget_call_ops,
+ .callback_data = lgp,
+ .flags = RPC_TASK_ASYNC,
+ };
+ int status = 0;
+
+ dprintk("--> %s\n", __func__);
+
+ lgp->res.layout.buf = (void *)__get_free_page(GFP_NOFS);
+ if (lgp->res.layout.buf == NULL) {
+ nfs4_layoutget_release(lgp);
+ return -ENOMEM;
+ }
+
+ lgp->res.seq_res.sr_slot = NULL;
+ task = rpc_run_task(&task_setup_data);
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+ status = nfs4_wait_for_completion_rpc_task(task);
+ if (status != 0)
+ goto out;
+ status = lgp->status;
+ if (status != 0)
+ goto out;
+ status = pnfs_layout_process(lgp);
+out:
+ rpc_put_task(task);
+ dprintk("<-- %s status=%d\n", __func__, status);
+ return status;
+}
+
+static int
+_nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
+{
+ struct nfs4_getdeviceinfo_args args = {
+ .pdev = pdev,
+ };
+ struct nfs4_getdeviceinfo_res res = {
+ .pdev = pdev,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETDEVICEINFO],
+ .rpc_argp = &args,
+ .rpc_resp = &res,
+ };
+ int status;
+
+ dprintk("--> %s\n", __func__);
+ status = nfs4_call_sync(server, &msg, &args, &res, 0);
+ dprintk("<-- %s status=%d\n", __func__, status);
+
+ return status;
+}
+
+int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
+{
+ struct nfs4_exception exception = { };
+ int err;
+
+ do {
+ err = nfs4_handle_exception(server,
+ _nfs4_proc_getdeviceinfo(server, pdev),
+ &exception);
+ } while (exception.retry);
+ return err;
+}
+EXPORT_SYMBOL_GPL(nfs4_proc_getdeviceinfo);
+
#endif /* CONFIG_NFS_V4_1 */
struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index aa0b02a610c4..f575a3126737 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -54,6 +54,7 @@
#include "callback.h"
#include "delegation.h"
#include "internal.h"
+#include "pnfs.h"
#define OPENOWNER_POOL_SIZE 8
@@ -1475,6 +1476,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
}
clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
+ pnfs_destroy_all_layouts(clp);
}
if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index bd2101d918c8..f313c4cce7e4 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -52,6 +52,7 @@
#include <linux/nfs_idmap.h>
#include "nfs4_fs.h"
#include "internal.h"
+#include "pnfs.h"
#define NFSDBG_FACILITY NFSDBG_XDR
@@ -310,6 +311,19 @@ static int nfs4_stat_to_errno(int);
XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5)
#define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4)
#define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4)
+#define encode_getdeviceinfo_maxsz (op_encode_hdr_maxsz + 4 + \
+ XDR_QUADLEN(NFS4_DEVICEID4_SIZE))
+#define decode_getdeviceinfo_maxsz (op_decode_hdr_maxsz + \
+ 1 /* layout type */ + \
+ 1 /* opaque devaddr4 length */ + \
+ /* devaddr4 payload is read into page */ \
+ 1 /* notification bitmap length */ + \
+ 1 /* notification bitmap */)
+#define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \
+ encode_stateid_maxsz)
+#define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \
+ decode_stateid_maxsz + \
+ XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE))
#else /* CONFIG_NFS_V4_1 */
#define encode_sequence_maxsz 0
#define decode_sequence_maxsz 0
@@ -699,6 +713,20 @@ static int nfs4_stat_to_errno(int);
#define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_reclaim_complete_maxsz)
+#define NFS4_enc_getdeviceinfo_sz (compound_encode_hdr_maxsz + \
+ encode_sequence_maxsz +\
+ encode_getdeviceinfo_maxsz)
+#define NFS4_dec_getdeviceinfo_sz (compound_decode_hdr_maxsz + \
+ decode_sequence_maxsz + \
+ decode_getdeviceinfo_maxsz)
+#define NFS4_enc_layoutget_sz (compound_encode_hdr_maxsz + \
+ encode_sequence_maxsz + \
+ encode_putfh_maxsz + \
+ encode_layoutget_maxsz)
+#define NFS4_dec_layoutget_sz (compound_decode_hdr_maxsz + \
+ decode_sequence_maxsz + \
+ decode_putfh_maxsz + \
+ decode_layoutget_maxsz)
const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
compound_encode_hdr_maxsz +
@@ -1737,6 +1765,58 @@ static void encode_sequence(struct xdr_stream *xdr,
#endif /* CONFIG_NFS_V4_1 */
}
+#ifdef CONFIG_NFS_V4_1
+static void
+encode_getdeviceinfo(struct xdr_stream *xdr,
+ const struct nfs4_getdeviceinfo_args *args,
+ struct compound_hdr *hdr)
+{
+ __be32 *p;
+
+ p = reserve_space(xdr, 16 + NFS4_DEVICEID4_SIZE);
+ *p++ = cpu_to_be32(OP_GETDEVICEINFO);
+ p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data,
+ NFS4_DEVICEID4_SIZE);
+ *p++ = cpu_to_be32(args->pdev->layout_type);
+ *p++ = cpu_to_be32(args->pdev->pglen); /* gdia_maxcount */
+ *p++ = cpu_to_be32(0); /* bitmap length 0 */
+ hdr->nops++;
+ hdr->replen += decode_getdeviceinfo_maxsz;
+}
+
+static void
+encode_layoutget(struct xdr_stream *xdr,
+ const struct nfs4_layoutget_args *args,
+ struct compound_hdr *hdr)
+{
+ nfs4_stateid stateid;
+ __be32 *p;
+
+ p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE);
+ *p++ = cpu_to_be32(OP_LAYOUTGET);
+ *p++ = cpu_to_be32(0); /* Signal layout available */
+ *p++ = cpu_to_be32(args->type);
+ *p++ = cpu_to_be32(args->range.iomode);
+ p = xdr_encode_hyper(p, args->range.offset);
+ p = xdr_encode_hyper(p, args->range.length);
+ p = xdr_encode_hyper(p, args->minlength);
+ pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout,
+ args->ctx->state);
+ p = xdr_encode_opaque_fixed(p, &stateid.data, NFS4_STATEID_SIZE);
+ *p = cpu_to_be32(args->maxcount);
+
+ dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n",
+ __func__,
+ args->type,
+ args->range.iomode,
+ (unsigned long)args->range.offset,
+ (unsigned long)args->range.length,
+ args->maxcount);
+ hdr->nops++;
+ hdr->replen += decode_layoutget_maxsz;
+}
+#endif /* CONFIG_NFS_V4_1 */
+
/*
* END OF "GENERIC" ENCODE ROUTINES.
*/
@@ -2554,6 +2634,51 @@ static int nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req, uint32_t *p,
return 0;
}
+/*
+ * Encode GETDEVICEINFO request
+ */
+static int nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, uint32_t *p,
+ struct nfs4_getdeviceinfo_args *args)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+ };
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, req, &hdr);
+ encode_sequence(&xdr, &args->seq_args, &hdr);
+ encode_getdeviceinfo(&xdr, args, &hdr);
+
+ /* set up reply kvec. Subtract notification bitmap max size (2)
+ * so that notification bitmap is put in xdr_buf tail */
+ xdr_inline_pages(&req->rq_rcv_buf, (hdr.replen - 2) << 2,
+ args->pdev->pages, args->pdev->pgbase,
+ args->pdev->pglen);
+
+ encode_nops(&hdr);
+ return 0;
+}
+
+/*
+ * Encode LAYOUTGET request
+ */
+static int nfs4_xdr_enc_layoutget(struct rpc_rqst *req, uint32_t *p,
+ struct nfs4_layoutget_args *args)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+ };
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, req, &hdr);
+ encode_sequence(&xdr, &args->seq_args, &hdr);
+ encode_putfh(&xdr, NFS_FH(args->inode), &hdr);
+ encode_layoutget(&xdr, args, &hdr);
+ encode_nops(&hdr);
+ return 0;
+}
#endif /* CONFIG_NFS_V4_1 */
static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
@@ -3978,6 +4103,61 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
return decode_getfattr_generic(xdr, fattr, NULL, server, may_sleep);
}
+/*
+ * Decode potentially multiple layout types. Currently we only support
+ * one layout driver per file system.
+ */
+static int decode_first_pnfs_layout_type(struct xdr_stream *xdr,
+ uint32_t *layouttype)
+{
+ uint32_t *p;
+ int num;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ num = be32_to_cpup(p);
+
+ /* pNFS is not supported by the underlying file system */
+ if (num == 0) {
+ *layouttype = 0;
+ return 0;
+ }
+ if (num > 1)
+ printk(KERN_INFO "%s: Warning: Multiple pNFS layout drivers "
+ "per filesystem not supported\n", __func__);
+
+ /* Decode and set first layout type, move xdr->p past unused types */
+ p = xdr_inline_decode(xdr, num * 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ *layouttype = be32_to_cpup(p);
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+/*
+ * The type of file system exported.
+ * Note we must ensure that layouttype is set in any non-error case.
+ */
+static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap,
+ uint32_t *layouttype)
+{
+ int status = 0;
+
+ dprintk("%s: bitmap is %x\n", __func__, bitmap[1]);
+ if (unlikely(bitmap[1] & (FATTR4_WORD1_FS_LAYOUT_TYPES - 1U)))
+ return -EIO;
+ if (bitmap[1] & FATTR4_WORD1_FS_LAYOUT_TYPES) {
+ status = decode_first_pnfs_layout_type(xdr, layouttype);
+ bitmap[1] &= ~FATTR4_WORD1_FS_LAYOUT_TYPES;
+ } else
+ *layouttype = 0;
+ return status;
+}
+
static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
{
__be32 *savep;
@@ -4006,6 +4186,9 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
status = decode_attr_time_delta(xdr, bitmap, &fsinfo->time_delta);
if (status != 0)
goto xdr_error;
+ status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype);
+ if (status != 0)
+ goto xdr_error;
status = verify_attr_len(xdr, savep, attrlen);
xdr_error:
@@ -4772,6 +4955,134 @@ out_overflow:
#endif /* CONFIG_NFS_V4_1 */
}
+#if defined(CONFIG_NFS_V4_1)
+
+static int decode_getdeviceinfo(struct xdr_stream *xdr,
+ struct pnfs_device *pdev)
+{
+ __be32 *p;
+ uint32_t len, type;
+ int status;
+
+ status = decode_op_hdr(xdr, OP_GETDEVICEINFO);
+ if (status) {
+ if (status == -ETOOSMALL) {
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ pdev->mincount = be32_to_cpup(p);
+ dprintk("%s: Min count too small. mincnt = %u\n",
+ __func__, pdev->mincount);
+ }
+ return status;
+ }
+
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ type = be32_to_cpup(p++);
+ if (type != pdev->layout_type) {
+ dprintk("%s: layout mismatch req: %u pdev: %u\n",
+ __func__, pdev->layout_type, type);
+ return -EINVAL;
+ }
+ /*
+ * Get the length of the opaque device_addr4. xdr_read_pages places
+ * the opaque device_addr4 in the xdr_buf->pages (pnfs_device->pages)
+ * and places the remaining xdr data in xdr_buf->tail
+ */
+ pdev->mincount = be32_to_cpup(p);
+ xdr_read_pages(xdr, pdev->mincount); /* include space for the length */
+
+ /* Parse notification bitmap, verifying that it is zero. */
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ len = be32_to_cpup(p);
+ if (len) {
+ int i;
+
+ p = xdr_inline_decode(xdr, 4 * len);
+ if (unlikely(!p))
+ goto out_overflow;
+ for (i = 0; i < len; i++, p++) {
+ if (be32_to_cpup(p)) {
+ dprintk("%s: notifications not supported\n",
+ __func__);
+ return -EIO;
+ }
+ }
+ }
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
+ struct nfs4_layoutget_res *res)
+{
+ __be32 *p;
+ int status;
+ u32 layout_count;
+
+ status = decode_op_hdr(xdr, OP_LAYOUTGET);
+ if (status)
+ return status;
+ p = xdr_inline_decode(xdr, 8 + NFS4_STATEID_SIZE);
+ if (unlikely(!p))
+ goto out_overflow;
+ res->return_on_close = be32_to_cpup(p++);
+ p = xdr_decode_opaque_fixed(p, res->stateid.data, NFS4_STATEID_SIZE);
+ layout_count = be32_to_cpup(p);
+ if (!layout_count) {
+ dprintk("%s: server responded with empty layout array\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ p = xdr_inline_decode(xdr, 24);
+ if (unlikely(!p))
+ goto out_overflow;
+ p = xdr_decode_hyper(p, &res->range.offset);
+ p = xdr_decode_hyper(p, &res->range.length);
+ res->range.iomode = be32_to_cpup(p++);
+ res->type = be32_to_cpup(p++);
+
+ status = decode_opaque_inline(xdr, &res->layout.len, (char **)&p);
+ if (unlikely(status))
+ return status;
+
+ dprintk("%s roff:%lu rlen:%lu riomode:%d, lo_type:0x%x, lo.len:%d\n",
+ __func__,
+ (unsigned long)res->range.offset,
+ (unsigned long)res->range.length,
+ res->range.iomode,
+ res->type,
+ res->layout.len);
+
+ /* nfs4_proc_layoutget allocated a single page */
+ if (res->layout.len > PAGE_SIZE)
+ return -ENOMEM;
+ memcpy(res->layout.buf, p, res->layout.len);
+
+ if (layout_count > 1) {
+ /* We only handle a length one array at the moment. Any
+ * further entries are just ignored. Note that this means
+ * the client may see a response that is less than the
+ * minimum it requested.
+ */
+ dprintk("%s: server responded with %d layouts, dropping tail\n",
+ __func__, layout_count);
+ }
+
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+#endif /* CONFIG_NFS_V4_1 */
+
/*
* END OF "GENERIC" DECODE ROUTINES.
*/
@@ -5799,6 +6110,53 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, uint32_t *p,
status = decode_reclaim_complete(&xdr, (void *)NULL);
return status;
}
+
+/*
+ * Decode GETDEVINFO response
+ */
+static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, uint32_t *p,
+ struct nfs4_getdeviceinfo_res *res)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr;
+ int status;
+
+ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+ status = decode_compound_hdr(&xdr, &hdr);
+ if (status != 0)
+ goto out;
+ status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ if (status != 0)
+ goto out;
+ status = decode_getdeviceinfo(&xdr, res->pdev);
+out:
+ return status;
+}
+
+/*
+ * Decode LAYOUTGET response
+ */
+static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp, uint32_t *p,
+ struct nfs4_layoutget_res *res)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr;
+ int status;
+
+ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+ status = decode_compound_hdr(&xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ if (status)
+ goto out;
+ status = decode_putfh(&xdr);
+ if (status)
+ goto out;
+ status = decode_layoutget(&xdr, rqstp, res);
+out:
+ return status;
+}
#endif /* CONFIG_NFS_V4_1 */
__be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
@@ -5990,6 +6348,8 @@ struct rpc_procinfo nfs4_procedures[] = {
PROC(SEQUENCE, enc_sequence, dec_sequence),
PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time),
PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete),
+ PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo),
+ PROC(LAYOUTGET, enc_layoutget, dec_layoutget),
#endif /* CONFIG_NFS_V4_1 */
};
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
new file mode 100644
index 000000000000..db773428f95f
--- /dev/null
+++ b/fs/nfs/pnfs.c
@@ -0,0 +1,783 @@
+/*
+ * pNFS functions to call and manage layout drivers.
+ *
+ * Copyright (c) 2002 [year of first publication]
+ * The Regents of the University of Michigan
+ * All Rights Reserved
+ *
+ * Dean Hildebrand <dhildebz@umich.edu>
+ *
+ * Permission is granted to use, copy, create derivative works, and
+ * redistribute this software and such derivative works for any purpose,
+ * so long as the name of the University of Michigan is not used in
+ * any advertising or publicity pertaining to the use or distribution
+ * of this software without specific, written prior authorization. If
+ * the above copyright notice or any other identification of the
+ * University of Michigan is included in any copy of any portion of
+ * this software, then the disclaimer below must also be included.
+ *
+ * This software is provided as is, without representation or warranty
+ * of any kind either express or implied, including without limitation
+ * the implied warranties of merchantability, fitness for a particular
+ * purpose, or noninfringement. The Regents of the University of
+ * Michigan shall not be liable for any damages, including special,
+ * indirect, incidental, or consequential damages, with respect to any
+ * claim arising out of or in connection with the use of the software,
+ * even if it has been or is hereafter advised of the possibility of
+ * such damages.
+ */
+
+#include <linux/nfs_fs.h>
+#include "internal.h"
+#include "pnfs.h"
+
+#define NFSDBG_FACILITY NFSDBG_PNFS
+
+/* Locking:
+ *
+ * pnfs_spinlock:
+ * protects pnfs_modules_tbl.
+ */
+static DEFINE_SPINLOCK(pnfs_spinlock);
+
+/*
+ * pnfs_modules_tbl holds all pnfs modules
+ */
+static LIST_HEAD(pnfs_modules_tbl);
+
+/* Return the registered pnfs layout driver module matching given id */
+static struct pnfs_layoutdriver_type *
+find_pnfs_driver_locked(u32 id)
+{
+ struct pnfs_layoutdriver_type *local;
+
+ list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid)
+ if (local->id == id)
+ goto out;
+ local = NULL;
+out:
+ dprintk("%s: Searching for id %u, found %p\n", __func__, id, local);
+ return local;
+}
+
+static struct pnfs_layoutdriver_type *
+find_pnfs_driver(u32 id)
+{
+ struct pnfs_layoutdriver_type *local;
+
+ spin_lock(&pnfs_spinlock);
+ local = find_pnfs_driver_locked(id);
+ spin_unlock(&pnfs_spinlock);
+ return local;
+}
+
+void
+unset_pnfs_layoutdriver(struct nfs_server *nfss)
+{
+ if (nfss->pnfs_curr_ld) {
+ nfss->pnfs_curr_ld->clear_layoutdriver(nfss);
+ module_put(nfss->pnfs_curr_ld->owner);
+ }
+ nfss->pnfs_curr_ld = NULL;
+}
+
+/*
+ * Try to set the server's pnfs module to the pnfs layout type specified by id.
+ * Currently only one pNFS layout driver per filesystem is supported.
+ *
+ * @id layout type. Zero (illegal layout type) indicates pNFS not in use.
+ */
+void
+set_pnfs_layoutdriver(struct nfs_server *server, u32 id)
+{
+ struct pnfs_layoutdriver_type *ld_type = NULL;
+
+ if (id == 0)
+ goto out_no_driver;
+ if (!(server->nfs_client->cl_exchange_flags &
+ (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) {
+ printk(KERN_ERR "%s: id %u cl_exchange_flags 0x%x\n", __func__,
+ id, server->nfs_client->cl_exchange_flags);
+ goto out_no_driver;
+ }
+ ld_type = find_pnfs_driver(id);
+ if (!ld_type) {
+ request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
+ ld_type = find_pnfs_driver(id);
+ if (!ld_type) {
+ dprintk("%s: No pNFS module found for %u.\n",
+ __func__, id);
+ goto out_no_driver;
+ }
+ }
+ if (!try_module_get(ld_type->owner)) {
+ dprintk("%s: Could not grab reference on module\n", __func__);
+ goto out_no_driver;
+ }
+ server->pnfs_curr_ld = ld_type;
+ if (ld_type->set_layoutdriver(server)) {
+ printk(KERN_ERR
+ "%s: Error initializing mount point for layout driver %u.\n",
+ __func__, id);
+ module_put(ld_type->owner);
+ goto out_no_driver;
+ }
+ dprintk("%s: pNFS module for %u set\n", __func__, id);
+ return;
+
+out_no_driver:
+ dprintk("%s: Using NFSv4 I/O\n", __func__);
+ server->pnfs_curr_ld = NULL;
+}
+
+int
+pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
+{
+ int status = -EINVAL;
+ struct pnfs_layoutdriver_type *tmp;
+
+ if (ld_type->id == 0) {
+ printk(KERN_ERR "%s id 0 is reserved\n", __func__);
+ return status;
+ }
+ if (!ld_type->alloc_lseg || !ld_type->free_lseg) {
+ printk(KERN_ERR "%s Layout driver must provide "
+ "alloc_lseg and free_lseg.\n", __func__);
+ return status;
+ }
+
+ spin_lock(&pnfs_spinlock);
+ tmp = find_pnfs_driver_locked(ld_type->id);
+ if (!tmp) {
+ list_add(&ld_type->pnfs_tblid, &pnfs_modules_tbl);
+ status = 0;
+ dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id,
+ ld_type->name);
+ } else {
+ printk(KERN_ERR "%s Module with id %d already loaded!\n",
+ __func__, ld_type->id);
+ }
+ spin_unlock(&pnfs_spinlock);
+
+ return status;
+}
+EXPORT_SYMBOL_GPL(pnfs_register_layoutdriver);
+
+void
+pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
+{
+ dprintk("%s Deregistering id:%u\n", __func__, ld_type->id);
+ spin_lock(&pnfs_spinlock);
+ list_del(&ld_type->pnfs_tblid);
+ spin_unlock(&pnfs_spinlock);
+}
+EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
+
+/*
+ * pNFS client layout cache
+ */
+
+static void
+get_layout_hdr_locked(struct pnfs_layout_hdr *lo)
+{
+ assert_spin_locked(&lo->inode->i_lock);
+ lo->refcount++;
+}
+
+static void
+put_layout_hdr_locked(struct pnfs_layout_hdr *lo)
+{
+ assert_spin_locked(&lo->inode->i_lock);
+ BUG_ON(lo->refcount == 0);
+
+ lo->refcount--;
+ if (!lo->refcount) {
+ dprintk("%s: freeing layout cache %p\n", __func__, lo);
+ BUG_ON(!list_empty(&lo->layouts));
+ NFS_I(lo->inode)->layout = NULL;
+ kfree(lo);
+ }
+}
+
+void
+put_layout_hdr(struct inode *inode)
+{
+ spin_lock(&inode->i_lock);
+ put_layout_hdr_locked(NFS_I(inode)->layout);
+ spin_unlock(&inode->i_lock);
+}
+
+static void
+init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
+{
+ INIT_LIST_HEAD(&lseg->fi_list);
+ kref_init(&lseg->kref);
+ lseg->layout = lo;
+}
+
+/* Called without i_lock held, as the free_lseg call may sleep */
+static void
+destroy_lseg(struct kref *kref)
+{
+ struct pnfs_layout_segment *lseg =
+ container_of(kref, struct pnfs_layout_segment, kref);
+ struct inode *ino = lseg->layout->inode;
+
+ dprintk("--> %s\n", __func__);
+ NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
+ /* Matched by get_layout_hdr_locked in pnfs_insert_layout */
+ put_layout_hdr(ino);
+}
+
+static void
+put_lseg(struct pnfs_layout_segment *lseg)
+{
+ if (!lseg)
+ return;
+
+ dprintk("%s: lseg %p ref %d\n", __func__, lseg,
+ atomic_read(&lseg->kref.refcount));
+ kref_put(&lseg->kref, destroy_lseg);
+}
+
+static void
+pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list)
+{
+ struct pnfs_layout_segment *lseg, *next;
+ struct nfs_client *clp;
+
+ dprintk("%s:Begin lo %p\n", __func__, lo);
+
+ assert_spin_locked(&lo->inode->i_lock);
+ list_for_each_entry_safe(lseg, next, &lo->segs, fi_list) {
+ dprintk("%s: freeing lseg %p\n", __func__, lseg);
+ list_move(&lseg->fi_list, tmp_list);
+ }
+ clp = NFS_SERVER(lo->inode)->nfs_client;
+ spin_lock(&clp->cl_lock);
+ /* List does not take a reference, so no need for put here */
+ list_del_init(&lo->layouts);
+ spin_unlock(&clp->cl_lock);
+ write_seqlock(&lo->seqlock);
+ clear_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
+ write_sequnlock(&lo->seqlock);
+
+ dprintk("%s:Return\n", __func__);
+}
+
+static void
+pnfs_free_lseg_list(struct list_head *tmp_list)
+{
+ struct pnfs_layout_segment *lseg;
+
+ while (!list_empty(tmp_list)) {
+ lseg = list_entry(tmp_list->next, struct pnfs_layout_segment,
+ fi_list);
+ dprintk("%s calling put_lseg on %p\n", __func__, lseg);
+ list_del(&lseg->fi_list);
+ put_lseg(lseg);
+ }
+}
+
+void
+pnfs_destroy_layout(struct nfs_inode *nfsi)
+{
+ struct pnfs_layout_hdr *lo;
+ LIST_HEAD(tmp_list);
+
+ spin_lock(&nfsi->vfs_inode.i_lock);
+ lo = nfsi->layout;
+ if (lo) {
+ pnfs_clear_lseg_list(lo, &tmp_list);
+ /* Matched by refcount set to 1 in alloc_init_layout_hdr */
+ put_layout_hdr_locked(lo);
+ }
+ spin_unlock(&nfsi->vfs_inode.i_lock);
+ pnfs_free_lseg_list(&tmp_list);
+}
+
+/*
+ * Called by the state manger to remove all layouts established under an
+ * expired lease.
+ */
+void
+pnfs_destroy_all_layouts(struct nfs_client *clp)
+{
+ struct pnfs_layout_hdr *lo;
+ LIST_HEAD(tmp_list);
+
+ spin_lock(&clp->cl_lock);
+ list_splice_init(&clp->cl_layouts, &tmp_list);
+ spin_unlock(&clp->cl_lock);
+
+ while (!list_empty(&tmp_list)) {
+ lo = list_entry(tmp_list.next, struct pnfs_layout_hdr,
+ layouts);
+ dprintk("%s freeing layout for inode %lu\n", __func__,
+ lo->inode->i_ino);
+ pnfs_destroy_layout(NFS_I(lo->inode));
+ }
+}
+
+/* update lo->stateid with new if is more recent
+ *
+ * lo->stateid could be the open stateid, in which case we just use what given.
+ */
+static void
+pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
+ const nfs4_stateid *new)
+{
+ nfs4_stateid *old = &lo->stateid;
+ bool overwrite = false;
+
+ write_seqlock(&lo->seqlock);
+ if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state) ||
+ memcmp(old->stateid.other, new->stateid.other, sizeof(new->stateid.other)))
+ overwrite = true;
+ else {
+ u32 oldseq, newseq;
+
+ oldseq = be32_to_cpu(old->stateid.seqid);
+ newseq = be32_to_cpu(new->stateid.seqid);
+ if ((int)(newseq - oldseq) > 0)
+ overwrite = true;
+ }
+ if (overwrite)
+ memcpy(&old->stateid, &new->stateid, sizeof(new->stateid));
+ write_sequnlock(&lo->seqlock);
+}
+
+static void
+pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo,
+ struct nfs4_state *state)
+{
+ int seq;
+
+ dprintk("--> %s\n", __func__);
+ write_seqlock(&lo->seqlock);
+ do {
+ seq = read_seqbegin(&state->seqlock);
+ memcpy(lo->stateid.data, state->stateid.data,
+ sizeof(state->stateid.data));
+ } while (read_seqretry(&state->seqlock, seq));
+ set_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
+ write_sequnlock(&lo->seqlock);
+ dprintk("<-- %s\n", __func__);
+}
+
+void
+pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
+ struct nfs4_state *open_state)
+{
+ int seq;
+
+ dprintk("--> %s\n", __func__);
+ do {
+ seq = read_seqbegin(&lo->seqlock);
+ if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state)) {
+ /* This will trigger retry of the read */
+ pnfs_layout_from_open_stateid(lo, open_state);
+ } else
+ memcpy(dst->data, lo->stateid.data,
+ sizeof(lo->stateid.data));
+ } while (read_seqretry(&lo->seqlock, seq));
+ dprintk("<-- %s\n", __func__);
+}
+
+/*
+* Get layout from server.
+* for now, assume that whole file layouts are requested.
+* arg->offset: 0
+* arg->length: all ones
+*/
+static struct pnfs_layout_segment *
+send_layoutget(struct pnfs_layout_hdr *lo,
+ struct nfs_open_context *ctx,
+ u32 iomode)
+{
+ struct inode *ino = lo->inode;
+ struct nfs_server *server = NFS_SERVER(ino);
+ struct nfs4_layoutget *lgp;
+ struct pnfs_layout_segment *lseg = NULL;
+
+ dprintk("--> %s\n", __func__);
+
+ BUG_ON(ctx == NULL);
+ lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
+ if (lgp == NULL) {
+ put_layout_hdr(lo->inode);
+ return NULL;
+ }
+ lgp->args.minlength = NFS4_MAX_UINT64;
+ lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
+ lgp->args.range.iomode = iomode;
+ lgp->args.range.offset = 0;
+ lgp->args.range.length = NFS4_MAX_UINT64;
+ lgp->args.type = server->pnfs_curr_ld->id;
+ lgp->args.inode = ino;
+ lgp->args.ctx = get_nfs_open_context(ctx);
+ lgp->lsegpp = &lseg;
+
+ /* Synchronously retrieve layout information from server and
+ * store in lseg.
+ */
+ nfs4_proc_layoutget(lgp);
+ if (!lseg) {
+ /* remember that LAYOUTGET failed and suspend trying */
+ set_bit(lo_fail_bit(iomode), &lo->state);
+ }
+ return lseg;
+}
+
+/*
+ * Compare two layout segments for sorting into layout cache.
+ * We want to preferentially return RW over RO layouts, so ensure those
+ * are seen first.
+ */
+static s64
+cmp_layout(u32 iomode1, u32 iomode2)
+{
+ /* read > read/write */
+ return (int)(iomode2 == IOMODE_READ) - (int)(iomode1 == IOMODE_READ);
+}
+
+static void
+pnfs_insert_layout(struct pnfs_layout_hdr *lo,
+ struct pnfs_layout_segment *lseg)
+{
+ struct pnfs_layout_segment *lp;
+ int found = 0;
+
+ dprintk("%s:Begin\n", __func__);
+
+ assert_spin_locked(&lo->inode->i_lock);
+ if (list_empty(&lo->segs)) {
+ struct nfs_client *clp = NFS_SERVER(lo->inode)->nfs_client;
+
+ spin_lock(&clp->cl_lock);
+ BUG_ON(!list_empty(&lo->layouts));
+ list_add_tail(&lo->layouts, &clp->cl_layouts);
+ spin_unlock(&clp->cl_lock);
+ }
+ list_for_each_entry(lp, &lo->segs, fi_list) {
+ if (cmp_layout(lp->range.iomode, lseg->range.iomode) > 0)
+ continue;
+ list_add_tail(&lseg->fi_list, &lp->fi_list);
+ dprintk("%s: inserted lseg %p "
+ "iomode %d offset %llu length %llu before "
+ "lp %p iomode %d offset %llu length %llu\n",
+ __func__, lseg, lseg->range.iomode,
+ lseg->range.offset, lseg->range.length,
+ lp, lp->range.iomode, lp->range.offset,
+ lp->range.length);
+ found = 1;
+ break;
+ }
+ if (!found) {
+ list_add_tail(&lseg->fi_list, &lo->segs);
+ dprintk("%s: inserted lseg %p "
+ "iomode %d offset %llu length %llu at tail\n",
+ __func__, lseg, lseg->range.iomode,
+ lseg->range.offset, lseg->range.length);
+ }
+ get_layout_hdr_locked(lo);
+
+ dprintk("%s:Return\n", __func__);
+}
+
+static struct pnfs_layout_hdr *
+alloc_init_layout_hdr(struct inode *ino)
+{
+ struct pnfs_layout_hdr *lo;
+
+ lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL);
+ if (!lo)
+ return NULL;
+ lo->refcount = 1;
+ INIT_LIST_HEAD(&lo->layouts);
+ INIT_LIST_HEAD(&lo->segs);
+ seqlock_init(&lo->seqlock);
+ lo->inode = ino;
+ return lo;
+}
+
+static struct pnfs_layout_hdr *
+pnfs_find_alloc_layout(struct inode *ino)
+{
+ struct nfs_inode *nfsi = NFS_I(ino);
+ struct pnfs_layout_hdr *new = NULL;
+
+ dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
+
+ assert_spin_locked(&ino->i_lock);
+ if (nfsi->layout)
+ return nfsi->layout;
+
+ spin_unlock(&ino->i_lock);
+ new = alloc_init_layout_hdr(ino);
+ spin_lock(&ino->i_lock);
+
+ if (likely(nfsi->layout == NULL)) /* Won the race? */
+ nfsi->layout = new;
+ else
+ kfree(new);
+ return nfsi->layout;
+}
+
+/*
+ * iomode matching rules:
+ * iomode lseg match
+ * ----- ----- -----
+ * ANY READ true
+ * ANY RW true
+ * RW READ false
+ * RW RW true
+ * READ READ true
+ * READ RW true
+ */
+static int
+is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode)
+{
+ return (iomode != IOMODE_RW || lseg->range.iomode == IOMODE_RW);
+}
+
+/*
+ * lookup range in layout
+ */
+static struct pnfs_layout_segment *
+pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode)
+{
+ struct pnfs_layout_segment *lseg, *ret = NULL;
+
+ dprintk("%s:Begin\n", __func__);
+
+ assert_spin_locked(&lo->inode->i_lock);
+ list_for_each_entry(lseg, &lo->segs, fi_list) {
+ if (is_matching_lseg(lseg, iomode)) {
+ ret = lseg;
+ break;
+ }
+ if (cmp_layout(iomode, lseg->range.iomode) > 0)
+ break;
+ }
+
+ dprintk("%s:Return lseg %p ref %d\n",
+ __func__, ret, ret ? atomic_read(&ret->kref.refcount) : 0);
+ return ret;
+}
+
+/*
+ * Layout segment is retreived from the server if not cached.
+ * The appropriate layout segment is referenced and returned to the caller.
+ */
+struct pnfs_layout_segment *
+pnfs_update_layout(struct inode *ino,
+ struct nfs_open_context *ctx,
+ enum pnfs_iomode iomode)
+{
+ struct nfs_inode *nfsi = NFS_I(ino);
+ struct pnfs_layout_hdr *lo;
+ struct pnfs_layout_segment *lseg = NULL;
+
+ if (!pnfs_enabled_sb(NFS_SERVER(ino)))
+ return NULL;
+ spin_lock(&ino->i_lock);
+ lo = pnfs_find_alloc_layout(ino);
+ if (lo == NULL) {
+ dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__);
+ goto out_unlock;
+ }
+
+ /* Check to see if the layout for the given range already exists */
+ lseg = pnfs_has_layout(lo, iomode);
+ if (lseg) {
+ dprintk("%s: Using cached lseg %p for iomode %d)\n",
+ __func__, lseg, iomode);
+ goto out_unlock;
+ }
+
+ /* if LAYOUTGET already failed once we don't try again */
+ if (test_bit(lo_fail_bit(iomode), &nfsi->layout->state))
+ goto out_unlock;
+
+ get_layout_hdr_locked(lo); /* Matched in nfs4_layoutget_release */
+ spin_unlock(&ino->i_lock);
+
+ lseg = send_layoutget(lo, ctx, iomode);
+out:
+ dprintk("%s end, state 0x%lx lseg %p\n", __func__,
+ nfsi->layout->state, lseg);
+ return lseg;
+out_unlock:
+ spin_unlock(&ino->i_lock);
+ goto out;
+}
+
+int
+pnfs_layout_process(struct nfs4_layoutget *lgp)
+{
+ struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout;
+ struct nfs4_layoutget_res *res = &lgp->res;
+ struct pnfs_layout_segment *lseg;
+ struct inode *ino = lo->inode;
+ int status = 0;
+
+ /* Inject layout blob into I/O device driver */
+ lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res);
+ if (!lseg || IS_ERR(lseg)) {
+ if (!lseg)
+ status = -ENOMEM;
+ else
+ status = PTR_ERR(lseg);
+ dprintk("%s: Could not allocate layout: error %d\n",
+ __func__, status);
+ goto out;
+ }
+
+ spin_lock(&ino->i_lock);
+ init_lseg(lo, lseg);
+ lseg->range = res->range;
+ *lgp->lsegpp = lseg;
+ pnfs_insert_layout(lo, lseg);
+
+ /* Done processing layoutget. Set the layout stateid */
+ pnfs_set_layout_stateid(lo, &res->stateid);
+ spin_unlock(&ino->i_lock);
+out:
+ return status;
+}
+
+/*
+ * Device ID cache. Currently supports one layout type per struct nfs_client.
+ * Add layout type to the lookup key to expand to support multiple types.
+ */
+int
+pnfs_alloc_init_deviceid_cache(struct nfs_client *clp,
+ void (*free_callback)(struct pnfs_deviceid_node *))
+{
+ struct pnfs_deviceid_cache *c;
+
+ c = kzalloc(sizeof(struct pnfs_deviceid_cache), GFP_KERNEL);
+ if (!c)
+ return -ENOMEM;
+ spin_lock(&clp->cl_lock);
+ if (clp->cl_devid_cache != NULL) {
+ atomic_inc(&clp->cl_devid_cache->dc_ref);
+ dprintk("%s [kref [%d]]\n", __func__,
+ atomic_read(&clp->cl_devid_cache->dc_ref));
+ kfree(c);
+ } else {
+ /* kzalloc initializes hlists */
+ spin_lock_init(&c->dc_lock);
+ atomic_set(&c->dc_ref, 1);
+ c->dc_free_callback = free_callback;
+ clp->cl_devid_cache = c;
+ dprintk("%s [new]\n", __func__);
+ }
+ spin_unlock(&clp->cl_lock);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pnfs_alloc_init_deviceid_cache);
+
+/*
+ * Called from pnfs_layoutdriver_type->free_lseg
+ * last layout segment reference frees deviceid
+ */
+void
+pnfs_put_deviceid(struct pnfs_deviceid_cache *c,
+ struct pnfs_deviceid_node *devid)
+{
+ struct nfs4_deviceid *id = &devid->de_id;
+ struct pnfs_deviceid_node *d;
+ struct hlist_node *n;
+ long h = nfs4_deviceid_hash(id);
+
+ dprintk("%s [%d]\n", __func__, atomic_read(&devid->de_ref));
+ if (!atomic_dec_and_lock(&devid->de_ref, &c->dc_lock))
+ return;
+
+ hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[h], de_node)
+ if (!memcmp(&d->de_id, id, sizeof(*id))) {
+ hlist_del_rcu(&d->de_node);
+ spin_unlock(&c->dc_lock);
+ synchronize_rcu();
+ c->dc_free_callback(devid);
+ return;
+ }
+ spin_unlock(&c->dc_lock);
+ /* Why wasn't it found in the list? */
+ BUG();
+}
+EXPORT_SYMBOL_GPL(pnfs_put_deviceid);
+
+/* Find and reference a deviceid */
+struct pnfs_deviceid_node *
+pnfs_find_get_deviceid(struct pnfs_deviceid_cache *c, struct nfs4_deviceid *id)
+{
+ struct pnfs_deviceid_node *d;
+ struct hlist_node *n;
+ long hash = nfs4_deviceid_hash(id);
+
+ dprintk("--> %s hash %ld\n", __func__, hash);
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
+ if (!memcmp(&d->de_id, id, sizeof(*id))) {
+ if (!atomic_inc_not_zero(&d->de_ref)) {
+ goto fail;
+ } else {
+ rcu_read_unlock();
+ return d;
+ }
+ }
+ }
+fail:
+ rcu_read_unlock();
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(pnfs_find_get_deviceid);
+
+/*
+ * Add a deviceid to the cache.
+ * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new
+ */
+struct pnfs_deviceid_node *
+pnfs_add_deviceid(struct pnfs_deviceid_cache *c, struct pnfs_deviceid_node *new)
+{
+ struct pnfs_deviceid_node *d;
+ long hash = nfs4_deviceid_hash(&new->de_id);
+
+ dprintk("--> %s hash %ld\n", __func__, hash);
+ spin_lock(&c->dc_lock);
+ d = pnfs_find_get_deviceid(c, &new->de_id);
+ if (d) {
+ spin_unlock(&c->dc_lock);
+ dprintk("%s [discard]\n", __func__);
+ c->dc_free_callback(new);
+ return d;
+ }
+ INIT_HLIST_NODE(&new->de_node);
+ atomic_set(&new->de_ref, 1);
+ hlist_add_head_rcu(&new->de_node, &c->dc_deviceids[hash]);
+ spin_unlock(&c->dc_lock);
+ dprintk("%s [new]\n", __func__);
+ return new;
+}
+EXPORT_SYMBOL_GPL(pnfs_add_deviceid);
+
+void
+pnfs_put_deviceid_cache(struct nfs_client *clp)
+{
+ struct pnfs_deviceid_cache *local = clp->cl_devid_cache;
+
+ dprintk("--> %s cl_devid_cache %p\n", __func__, clp->cl_devid_cache);
+ if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) {
+ int i;
+ /* Verify cache is empty */
+ for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i++)
+ BUG_ON(!hlist_empty(&local->dc_deviceids[i]));
+ clp->cl_devid_cache = NULL;
+ spin_unlock(&clp->cl_lock);
+ kfree(local);
+ }
+}
+EXPORT_SYMBOL_GPL(pnfs_put_deviceid_cache);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
new file mode 100644
index 000000000000..e12367d50489
--- /dev/null
+++ b/fs/nfs/pnfs.h
@@ -0,0 +1,189 @@
+/*
+ * pNFS client data structures.
+ *
+ * Copyright (c) 2002
+ * The Regents of the University of Michigan
+ * All Rights Reserved
+ *
+ * Dean Hildebrand <dhildebz@umich.edu>
+ *
+ * Permission is granted to use, copy, create derivative works, and
+ * redistribute this software and such derivative works for any purpose,
+ * so long as the name of the University of Michigan is not used in
+ * any advertising or publicity pertaining to the use or distribution
+ * of this software without specific, written prior authorization. If
+ * the above copyright notice or any other identification of the
+ * University of Michigan is included in any copy of any portion of
+ * this software, then the disclaimer below must also be included.
+ *
+ * This software is provided as is, without representation or warranty
+ * of any kind either express or implied, including without limitation
+ * the implied warranties of merchantability, fitness for a particular
+ * purpose, or noninfringement. The Regents of the University of
+ * Michigan shall not be liable for any damages, including special,
+ * indirect, incidental, or consequential damages, with respect to any
+ * claim arising out of or in connection with the use of the software,
+ * even if it has been or is hereafter advised of the possibility of
+ * such damages.
+ */
+
+#ifndef FS_NFS_PNFS_H
+#define FS_NFS_PNFS_H
+
+struct pnfs_layout_segment {
+ struct list_head fi_list;
+ struct pnfs_layout_range range;
+ struct kref kref;
+ struct pnfs_layout_hdr *layout;
+};
+
+#ifdef CONFIG_NFS_V4_1
+
+#define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4"
+
+enum {
+ NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */
+ NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */
+ NFS_LAYOUT_STATEID_SET, /* have a valid layout stateid */
+};
+
+/* Per-layout driver specific registration structure */
+struct pnfs_layoutdriver_type {
+ struct list_head pnfs_tblid;
+ const u32 id;
+ const char *name;
+ struct module *owner;
+ int (*set_layoutdriver) (struct nfs_server *);
+ int (*clear_layoutdriver) (struct nfs_server *);
+ struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr);
+ void (*free_lseg) (struct pnfs_layout_segment *lseg);
+};
+
+struct pnfs_layout_hdr {
+ unsigned long refcount;
+ struct list_head layouts; /* other client layouts */
+ struct list_head segs; /* layout segments list */
+ seqlock_t seqlock; /* Protects the stateid */
+ nfs4_stateid stateid;
+ unsigned long state;
+ struct inode *inode;
+};
+
+struct pnfs_device {
+ struct nfs4_deviceid dev_id;
+ unsigned int layout_type;
+ unsigned int mincount;
+ struct page **pages;
+ void *area;
+ unsigned int pgbase;
+ unsigned int pglen;
+};
+
+/*
+ * Device ID RCU cache. A device ID is unique per client ID and layout type.
+ */
+#define NFS4_DEVICE_ID_HASH_BITS 5
+#define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS)
+#define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1)
+
+static inline u32
+nfs4_deviceid_hash(struct nfs4_deviceid *id)
+{
+ unsigned char *cptr = (unsigned char *)id->data;
+ unsigned int nbytes = NFS4_DEVICEID4_SIZE;
+ u32 x = 0;
+
+ while (nbytes--) {
+ x *= 37;
+ x += *cptr++;
+ }
+ return x & NFS4_DEVICE_ID_HASH_MASK;
+}
+
+struct pnfs_deviceid_node {
+ struct hlist_node de_node;
+ struct nfs4_deviceid de_id;
+ atomic_t de_ref;
+};
+
+struct pnfs_deviceid_cache {
+ spinlock_t dc_lock;
+ atomic_t dc_ref;
+ void (*dc_free_callback)(struct pnfs_deviceid_node *);
+ struct hlist_head dc_deviceids[NFS4_DEVICE_ID_HASH_SIZE];
+};
+
+extern int pnfs_alloc_init_deviceid_cache(struct nfs_client *,
+ void (*free_callback)(struct pnfs_deviceid_node *));
+extern void pnfs_put_deviceid_cache(struct nfs_client *);
+extern struct pnfs_deviceid_node *pnfs_find_get_deviceid(
+ struct pnfs_deviceid_cache *,
+ struct nfs4_deviceid *);
+extern struct pnfs_deviceid_node *pnfs_add_deviceid(
+ struct pnfs_deviceid_cache *,
+ struct pnfs_deviceid_node *);
+extern void pnfs_put_deviceid(struct pnfs_deviceid_cache *c,
+ struct pnfs_deviceid_node *devid);
+
+extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *);
+extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *);
+
+/* nfs4proc.c */
+extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
+ struct pnfs_device *dev);
+extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp);
+
+/* pnfs.c */
+struct pnfs_layout_segment *
+pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
+ enum pnfs_iomode access_type);
+void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
+void unset_pnfs_layoutdriver(struct nfs_server *);
+int pnfs_layout_process(struct nfs4_layoutget *lgp);
+void pnfs_destroy_layout(struct nfs_inode *);
+void pnfs_destroy_all_layouts(struct nfs_client *);
+void put_layout_hdr(struct inode *inode);
+void pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
+ struct nfs4_state *open_state);
+
+
+static inline int lo_fail_bit(u32 iomode)
+{
+ return iomode == IOMODE_RW ?
+ NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED;
+}
+
+/* Return true if a layout driver is being used for this mountpoint */
+static inline int pnfs_enabled_sb(struct nfs_server *nfss)
+{
+ return nfss->pnfs_curr_ld != NULL;
+}
+
+#else /* CONFIG_NFS_V4_1 */
+
+static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
+{
+}
+
+static inline void pnfs_destroy_layout(struct nfs_inode *nfsi)
+{
+}
+
+static inline struct pnfs_layout_segment *
+pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
+ enum pnfs_iomode access_type)
+{
+ return NULL;
+}
+
+static inline void set_pnfs_layoutdriver(struct nfs_server *s, u32 id)
+{
+}
+
+static inline void unset_pnfs_layoutdriver(struct nfs_server *s)
+{
+}
+
+#endif /* CONFIG_NFS_V4_1 */
+
+#endif /* FS_NFS_PNFS_H */
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 79859c81a943..e4b62c6f5a6e 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -25,6 +25,7 @@
#include "internal.h"
#include "iostat.h"
#include "fscache.h"
+#include "pnfs.h"
#define NFSDBG_FACILITY NFSDBG_PAGECACHE
@@ -120,6 +121,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
len = nfs_page_length(page);
if (len == 0)
return nfs_return_empty_page(page);
+ pnfs_update_layout(inode, ctx, IOMODE_READ);
new = nfs_create_request(ctx, inode, page, 0, len);
if (IS_ERR(new)) {
unlock_page(page);
@@ -624,6 +626,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
if (ret == 0)
goto read_complete; /* all pages were read */
+ pnfs_update_layout(inode, desc.ctx, IOMODE_READ);
if (rsize < PAGE_CACHE_SIZE)
nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
else
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 7cf4ddafb4ab..31a78fce4732 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -29,6 +29,18 @@ config NFSD
If unsure, say N.
+config NFSD_DEPRECATED
+ bool "Include support for deprecated syscall interface to NFSD"
+ depends on NFSD
+ default y
+ help
+ The syscall interface to nfsd was obsoleted in 2.6.0 by a new
+ filesystem based interface. The old interface is due for removal
+ in 2.6.40. If you wish to remove the interface before then
+ say N.
+
+ In unsure, say Y.
+
config NFSD_V2_ACL
bool
depends on NFSD
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index c2a4f71d87dd..c0fcb7ab7f6d 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -28,9 +28,6 @@
typedef struct auth_domain svc_client;
typedef struct svc_export svc_export;
-static void exp_do_unexport(svc_export *unexp);
-static int exp_verify_string(char *cp, int max);
-
/*
* We have two caches.
* One maps client+vfsmnt+dentry to export options - the export map
@@ -802,6 +799,7 @@ exp_find_key(svc_client *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp)
return ek;
}
+#ifdef CONFIG_NFSD_DEPRECATED
static int exp_set_key(svc_client *clp, int fsid_type, u32 *fsidv,
struct svc_export *exp)
{
@@ -852,6 +850,7 @@ exp_get_fsid_key(svc_client *clp, int fsid)
return exp_find_key(clp, FSID_NUM, fsidv, NULL);
}
+#endif
static svc_export *exp_get_by_name(svc_client *clp, const struct path *path,
struct cache_req *reqp)
@@ -893,6 +892,7 @@ static struct svc_export *exp_parent(svc_client *clp, struct path *path)
return exp;
}
+#ifdef CONFIG_NFSD_DEPRECATED
/*
* Hashtable locking. Write locks are placed only by user processes
* wanting to modify export information.
@@ -925,6 +925,19 @@ exp_writeunlock(void)
{
up_write(&hash_sem);
}
+#else
+
+/* hash_sem not needed once deprecated interface is removed */
+void exp_readlock(void) {}
+static inline void exp_writelock(void){}
+void exp_readunlock(void) {}
+static inline void exp_writeunlock(void){}
+
+#endif
+
+#ifdef CONFIG_NFSD_DEPRECATED
+static void exp_do_unexport(svc_export *unexp);
+static int exp_verify_string(char *cp, int max);
static void exp_fsid_unhash(struct svc_export *exp)
{
@@ -935,10 +948,9 @@ static void exp_fsid_unhash(struct svc_export *exp)
ek = exp_get_fsid_key(exp->ex_client, exp->ex_fsid);
if (!IS_ERR(ek)) {
- ek->h.expiry_time = get_seconds()-1;
+ sunrpc_invalidate(&ek->h, &svc_expkey_cache);
cache_put(&ek->h, &svc_expkey_cache);
}
- svc_expkey_cache.nextcheck = get_seconds();
}
static int exp_fsid_hash(svc_client *clp, struct svc_export *exp)
@@ -973,10 +985,9 @@ static void exp_unhash(struct svc_export *exp)
ek = exp_get_key(exp->ex_client, inode->i_sb->s_dev, inode->i_ino);
if (!IS_ERR(ek)) {
- ek->h.expiry_time = get_seconds()-1;
+ sunrpc_invalidate(&ek->h, &svc_expkey_cache);
cache_put(&ek->h, &svc_expkey_cache);
}
- svc_expkey_cache.nextcheck = get_seconds();
}
/*
@@ -1097,8 +1108,7 @@ out:
static void
exp_do_unexport(svc_export *unexp)
{
- unexp->h.expiry_time = get_seconds()-1;
- svc_export_cache.nextcheck = get_seconds();
+ sunrpc_invalidate(&unexp->h, &svc_export_cache);
exp_unhash(unexp);
exp_fsid_unhash(unexp);
}
@@ -1150,6 +1160,7 @@ out_unlock:
exp_writeunlock();
return err;
}
+#endif /* CONFIG_NFSD_DEPRECATED */
/*
* Obtain the root fh on behalf of a client.
@@ -1459,25 +1470,43 @@ static void show_secinfo_flags(struct seq_file *m, int flags)
show_expflags(m, flags, NFSEXP_SECINFO_FLAGS);
}
+static bool secinfo_flags_equal(int f, int g)
+{
+ f &= NFSEXP_SECINFO_FLAGS;
+ g &= NFSEXP_SECINFO_FLAGS;
+ return f == g;
+}
+
+static int show_secinfo_run(struct seq_file *m, struct exp_flavor_info **fp, struct exp_flavor_info *end)
+{
+ int flags;
+
+ flags = (*fp)->flags;
+ seq_printf(m, ",sec=%d", (*fp)->pseudoflavor);
+ (*fp)++;
+ while (*fp != end && secinfo_flags_equal(flags, (*fp)->flags)) {
+ seq_printf(m, ":%d", (*fp)->pseudoflavor);
+ (*fp)++;
+ }
+ return flags;
+}
+
static void show_secinfo(struct seq_file *m, struct svc_export *exp)
{
struct exp_flavor_info *f;
struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors;
- int lastflags = 0, first = 0;
+ int flags;
if (exp->ex_nflavors == 0)
return;
- for (f = exp->ex_flavors; f < end; f++) {
- if (first || f->flags != lastflags) {
- if (!first)
- show_secinfo_flags(m, lastflags);
- seq_printf(m, ",sec=%d", f->pseudoflavor);
- lastflags = f->flags;
- } else {
- seq_printf(m, ":%d", f->pseudoflavor);
- }
+ f = exp->ex_flavors;
+ flags = show_secinfo_run(m, &f, end);
+ if (!secinfo_flags_equal(flags, exp->ex_flags))
+ show_secinfo_flags(m, flags);
+ while (f != end) {
+ flags = show_secinfo_run(m, &f, end);
+ show_secinfo_flags(m, flags);
}
- show_secinfo_flags(m, lastflags);
}
static void exp_flags(struct seq_file *m, int flag, int fsid,
@@ -1532,6 +1561,7 @@ const struct seq_operations nfs_exports_op = {
.show = e_show,
};
+#ifdef CONFIG_NFSD_DEPRECATED
/*
* Add or modify a client.
* Change requests may involve the list of host addresses. The list of
@@ -1563,7 +1593,7 @@ exp_addclient(struct nfsctl_client *ncp)
/* Insert client into hashtable. */
for (i = 0; i < ncp->cl_naddr; i++) {
ipv6_addr_set_v4mapped(ncp->cl_addrlist[i].s_addr, &addr6);
- auth_unix_add_addr(&addr6, dom);
+ auth_unix_add_addr(&init_net, &addr6, dom);
}
auth_unix_forget_old(dom);
auth_domain_put(dom);
@@ -1621,6 +1651,7 @@ exp_verify_string(char *cp, int max)
printk(KERN_NOTICE "nfsd: couldn't validate string %s\n", cp);
return 0;
}
+#endif /* CONFIG_NFSD_DEPRECATED */
/*
* Initialize the exports module.
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 988cbb3a19b6..143da2eecd7b 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -41,7 +41,6 @@
#define NFSPROC4_CB_NULL 0
#define NFSPROC4_CB_COMPOUND 1
-#define NFS4_STATEID_SIZE 16
/* Index of predefined Linux callback client operations */
@@ -248,10 +247,11 @@ encode_cb_recall(struct xdr_stream *xdr, struct nfs4_delegation *dp,
}
static void
-encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *args,
+encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_callback *cb,
struct nfs4_cb_compound_hdr *hdr)
{
__be32 *p;
+ struct nfsd4_session *ses = cb->cb_clp->cl_cb_session;
if (hdr->minorversion == 0)
return;
@@ -259,8 +259,8 @@ encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *args,
RESERVE_SPACE(1 + NFS4_MAX_SESSIONID_LEN + 20);
WRITE32(OP_CB_SEQUENCE);
- WRITEMEM(args->cbs_clp->cl_sessionid.data, NFS4_MAX_SESSIONID_LEN);
- WRITE32(args->cbs_clp->cl_cb_seq_nr);
+ WRITEMEM(ses->se_sessionid.data, NFS4_MAX_SESSIONID_LEN);
+ WRITE32(ses->se_cb_seq_nr);
WRITE32(0); /* slotid, always 0 */
WRITE32(0); /* highest slotid always 0 */
WRITE32(0); /* cachethis always 0 */
@@ -280,18 +280,18 @@ nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p)
static int
nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p,
- struct nfs4_rpc_args *rpc_args)
+ struct nfsd4_callback *cb)
{
struct xdr_stream xdr;
- struct nfs4_delegation *args = rpc_args->args_op;
+ struct nfs4_delegation *args = cb->cb_op;
struct nfs4_cb_compound_hdr hdr = {
- .ident = args->dl_ident,
- .minorversion = rpc_args->args_seq.cbs_minorversion,
+ .ident = cb->cb_clp->cl_cb_ident,
+ .minorversion = cb->cb_minorversion,
};
xdr_init_encode(&xdr, &req->rq_snd_buf, p);
encode_cb_compound_hdr(&xdr, &hdr);
- encode_cb_sequence(&xdr, &rpc_args->args_seq, &hdr);
+ encode_cb_sequence(&xdr, cb, &hdr);
encode_cb_recall(&xdr, args, &hdr);
encode_cb_nops(&hdr);
return 0;
@@ -339,15 +339,16 @@ decode_cb_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
* with a single slot.
*/
static int
-decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *res,
+decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_callback *cb,
struct rpc_rqst *rqstp)
{
+ struct nfsd4_session *ses = cb->cb_clp->cl_cb_session;
struct nfs4_sessionid id;
int status;
u32 dummy;
__be32 *p;
- if (res->cbs_minorversion == 0)
+ if (cb->cb_minorversion == 0)
return 0;
status = decode_cb_op_hdr(xdr, OP_CB_SEQUENCE);
@@ -363,13 +364,12 @@ decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *res,
READ_BUF(NFS4_MAX_SESSIONID_LEN + 16);
memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN);
p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN);
- if (memcmp(id.data, res->cbs_clp->cl_sessionid.data,
- NFS4_MAX_SESSIONID_LEN)) {
+ if (memcmp(id.data, ses->se_sessionid.data, NFS4_MAX_SESSIONID_LEN)) {
dprintk("%s Invalid session id\n", __func__);
goto out;
}
READ32(dummy);
- if (dummy != res->cbs_clp->cl_cb_seq_nr) {
+ if (dummy != ses->se_cb_seq_nr) {
dprintk("%s Invalid sequence number\n", __func__);
goto out;
}
@@ -393,7 +393,7 @@ nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p)
static int
nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p,
- struct nfsd4_cb_sequence *seq)
+ struct nfsd4_callback *cb)
{
struct xdr_stream xdr;
struct nfs4_cb_compound_hdr hdr;
@@ -403,8 +403,8 @@ nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p,
status = decode_cb_compound_hdr(&xdr, &hdr);
if (status)
goto out;
- if (seq) {
- status = decode_cb_sequence(&xdr, seq, rqstp);
+ if (cb) {
+ status = decode_cb_sequence(&xdr, cb, rqstp);
if (status)
goto out;
}
@@ -473,30 +473,34 @@ static int max_cb_time(void)
/* Reference counting, callback cleanup, etc., all look racy as heck.
* And why is cl_cb_set an atomic? */
-int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *cb)
+int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn)
{
struct rpc_timeout timeparms = {
.to_initval = max_cb_time(),
.to_retries = 0,
};
struct rpc_create_args args = {
- .protocol = XPRT_TRANSPORT_TCP,
- .address = (struct sockaddr *) &cb->cb_addr,
- .addrsize = cb->cb_addrlen,
+ .net = &init_net,
+ .address = (struct sockaddr *) &conn->cb_addr,
+ .addrsize = conn->cb_addrlen,
.timeout = &timeparms,
.program = &cb_program,
- .prognumber = cb->cb_prog,
.version = 0,
.authflavor = clp->cl_flavor,
.flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
- .client_name = clp->cl_principal,
};
struct rpc_clnt *client;
- if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5))
- return -EINVAL;
- if (cb->cb_minorversion) {
- args.bc_xprt = cb->cb_xprt;
+ if (clp->cl_minorversion == 0) {
+ if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5))
+ return -EINVAL;
+ args.client_name = clp->cl_principal;
+ args.prognumber = conn->cb_prog,
+ args.protocol = XPRT_TRANSPORT_TCP;
+ clp->cl_cb_ident = conn->cb_ident;
+ } else {
+ args.bc_xprt = conn->cb_xprt;
+ args.prognumber = clp->cl_cb_session->se_cb_prog;
args.protocol = XPRT_TRANSPORT_BC_TCP;
}
/* Create RPC client */
@@ -506,7 +510,7 @@ int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *cb)
PTR_ERR(client));
return PTR_ERR(client);
}
- nfsd4_set_callback_client(clp, client);
+ clp->cl_cb_client = client;
return 0;
}
@@ -519,7 +523,7 @@ static void warn_no_callback_path(struct nfs4_client *clp, int reason)
static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
{
- struct nfs4_client *clp = calldata;
+ struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null);
if (task->tk_status)
warn_no_callback_path(clp, task->tk_status);
@@ -528,6 +532,8 @@ static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
}
static const struct rpc_call_ops nfsd4_cb_probe_ops = {
+ /* XXX: release method to ensure we set the cb channel down if
+ * necessary on early failure? */
.rpc_call_done = nfsd4_cb_probe_done,
};
@@ -543,38 +549,42 @@ int set_callback_cred(void)
return 0;
}
+static struct workqueue_struct *callback_wq;
-void do_probe_callback(struct nfs4_client *clp)
+static void do_probe_callback(struct nfs4_client *clp)
{
- struct rpc_message msg = {
- .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
- .rpc_argp = clp,
- .rpc_cred = callback_cred
- };
- int status;
+ struct nfsd4_callback *cb = &clp->cl_cb_null;
- status = rpc_call_async(clp->cl_cb_client, &msg,
- RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
- &nfsd4_cb_probe_ops, (void *)clp);
- if (status)
- warn_no_callback_path(clp, status);
+ cb->cb_op = NULL;
+ cb->cb_clp = clp;
+
+ cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL];
+ cb->cb_msg.rpc_argp = NULL;
+ cb->cb_msg.rpc_resp = NULL;
+ cb->cb_msg.rpc_cred = callback_cred;
+
+ cb->cb_ops = &nfsd4_cb_probe_ops;
+
+ queue_work(callback_wq, &cb->cb_work);
}
/*
- * Set up the callback client and put a NFSPROC4_CB_NULL on the wire...
+ * Poke the callback thread to process any updates to the callback
+ * parameters, and send a null probe.
*/
-void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *cb)
+void nfsd4_probe_callback(struct nfs4_client *clp)
{
- int status;
+ set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_cb_flags);
+ do_probe_callback(clp);
+}
+void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn)
+{
BUG_ON(atomic_read(&clp->cl_cb_set));
- status = setup_callback_client(clp, cb);
- if (status) {
- warn_no_callback_path(clp, status);
- return;
- }
- do_probe_callback(clp);
+ spin_lock(&clp->cl_lock);
+ memcpy(&clp->cl_cb_conn, conn, sizeof(struct nfs4_cb_conn));
+ spin_unlock(&clp->cl_lock);
}
/*
@@ -585,8 +595,7 @@ void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *cb)
static int nfsd41_cb_setup_sequence(struct nfs4_client *clp,
struct rpc_task *task)
{
- struct nfs4_rpc_args *args = task->tk_msg.rpc_argp;
- u32 *ptr = (u32 *)clp->cl_sessionid.data;
+ u32 *ptr = (u32 *)clp->cl_cb_session->se_sessionid.data;
int status = 0;
dprintk("%s: %u:%u:%u:%u\n", __func__,
@@ -598,14 +607,6 @@ static int nfsd41_cb_setup_sequence(struct nfs4_client *clp,
status = -EAGAIN;
goto out;
}
-
- /*
- * We'll need the clp during XDR encoding and decoding,
- * and the sequence during decoding to verify the reply
- */
- args->args_seq.cbs_clp = clp;
- task->tk_msg.rpc_resp = &args->args_seq;
-
out:
dprintk("%s status=%d\n", __func__, status);
return status;
@@ -617,13 +618,13 @@ out:
*/
static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
{
- struct nfs4_delegation *dp = calldata;
+ struct nfsd4_callback *cb = calldata;
+ struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
struct nfs4_client *clp = dp->dl_client;
- struct nfs4_rpc_args *args = task->tk_msg.rpc_argp;
- u32 minorversion = clp->cl_cb_conn.cb_minorversion;
+ u32 minorversion = clp->cl_minorversion;
int status = 0;
- args->args_seq.cbs_minorversion = minorversion;
+ cb->cb_minorversion = minorversion;
if (minorversion) {
status = nfsd41_cb_setup_sequence(clp, task);
if (status) {
@@ -640,19 +641,20 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
{
- struct nfs4_delegation *dp = calldata;
+ struct nfsd4_callback *cb = calldata;
+ struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
struct nfs4_client *clp = dp->dl_client;
dprintk("%s: minorversion=%d\n", __func__,
- clp->cl_cb_conn.cb_minorversion);
+ clp->cl_minorversion);
- if (clp->cl_cb_conn.cb_minorversion) {
+ if (clp->cl_minorversion) {
/* No need for lock, access serialized in nfsd4_cb_prepare */
- ++clp->cl_cb_seq_nr;
+ ++clp->cl_cb_session->se_cb_seq_nr;
clear_bit(0, &clp->cl_cb_slot_busy);
rpc_wake_up_next(&clp->cl_cb_waitq);
dprintk("%s: freed slot, new seqid=%d\n", __func__,
- clp->cl_cb_seq_nr);
+ clp->cl_cb_session->se_cb_seq_nr);
/* We're done looking into the sequence information */
task->tk_msg.rpc_resp = NULL;
@@ -662,7 +664,8 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
{
- struct nfs4_delegation *dp = calldata;
+ struct nfsd4_callback *cb = calldata;
+ struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
struct nfs4_client *clp = dp->dl_client;
struct rpc_clnt *current_rpc_client = clp->cl_cb_client;
@@ -707,7 +710,8 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
static void nfsd4_cb_recall_release(void *calldata)
{
- struct nfs4_delegation *dp = calldata;
+ struct nfsd4_callback *cb = calldata;
+ struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
nfs4_put_delegation(dp);
}
@@ -718,8 +722,6 @@ static const struct rpc_call_ops nfsd4_cb_recall_ops = {
.rpc_release = nfsd4_cb_recall_release,
};
-static struct workqueue_struct *callback_wq;
-
int nfsd4_create_callback_queue(void)
{
callback_wq = create_singlethread_workqueue("nfsd4_callbacks");
@@ -734,57 +736,88 @@ void nfsd4_destroy_callback_queue(void)
}
/* must be called under the state lock */
-void nfsd4_set_callback_client(struct nfs4_client *clp, struct rpc_clnt *new)
+void nfsd4_shutdown_callback(struct nfs4_client *clp)
{
- struct rpc_clnt *old = clp->cl_cb_client;
-
- clp->cl_cb_client = new;
+ set_bit(NFSD4_CLIENT_KILL, &clp->cl_cb_flags);
/*
- * After this, any work that saw the old value of cl_cb_client will
- * be gone:
+ * Note this won't actually result in a null callback;
+ * instead, nfsd4_do_callback_rpc() will detect the killed
+ * client, destroy the rpc client, and stop:
*/
+ do_probe_callback(clp);
flush_workqueue(callback_wq);
- /* So we can safely shut it down: */
- if (old)
- rpc_shutdown_client(old);
}
-/*
- * called with dp->dl_count inc'ed.
- */
-static void _nfsd4_cb_recall(struct nfs4_delegation *dp)
+void nfsd4_release_cb(struct nfsd4_callback *cb)
{
- struct nfs4_client *clp = dp->dl_client;
- struct rpc_clnt *clnt = clp->cl_cb_client;
- struct nfs4_rpc_args *args = &dp->dl_recall.cb_args;
- struct rpc_message msg = {
- .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL],
- .rpc_cred = callback_cred
- };
+ if (cb->cb_ops->rpc_release)
+ cb->cb_ops->rpc_release(cb);
+}
- if (clnt == NULL) {
- nfs4_put_delegation(dp);
- return; /* Client is shutting down; give up. */
+void nfsd4_process_cb_update(struct nfsd4_callback *cb)
+{
+ struct nfs4_cb_conn conn;
+ struct nfs4_client *clp = cb->cb_clp;
+ int err;
+
+ /*
+ * This is either an update, or the client dying; in either case,
+ * kill the old client:
+ */
+ if (clp->cl_cb_client) {
+ rpc_shutdown_client(clp->cl_cb_client);
+ clp->cl_cb_client = NULL;
}
+ if (test_bit(NFSD4_CLIENT_KILL, &clp->cl_cb_flags))
+ return;
+ spin_lock(&clp->cl_lock);
+ /*
+ * Only serialized callback code is allowed to clear these
+ * flags; main nfsd code can only set them:
+ */
+ BUG_ON(!clp->cl_cb_flags);
+ clear_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_cb_flags);
+ memcpy(&conn, &cb->cb_clp->cl_cb_conn, sizeof(struct nfs4_cb_conn));
+ spin_unlock(&clp->cl_lock);
- args->args_op = dp;
- msg.rpc_argp = args;
- dp->dl_retries = 1;
- rpc_call_async(clnt, &msg, RPC_TASK_SOFT, &nfsd4_cb_recall_ops, dp);
+ err = setup_callback_client(clp, &conn);
+ if (err)
+ warn_no_callback_path(clp, err);
}
void nfsd4_do_callback_rpc(struct work_struct *w)
{
- /* XXX: for now, just send off delegation recall. */
- /* In future, generalize to handle any sort of callback. */
- struct nfsd4_callback *c = container_of(w, struct nfsd4_callback, cb_work);
- struct nfs4_delegation *dp = container_of(c, struct nfs4_delegation, dl_recall);
+ struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, cb_work);
+ struct nfs4_client *clp = cb->cb_clp;
+ struct rpc_clnt *clnt;
- _nfsd4_cb_recall(dp);
-}
+ if (clp->cl_cb_flags)
+ nfsd4_process_cb_update(cb);
+ clnt = clp->cl_cb_client;
+ if (!clnt) {
+ /* Callback channel broken, or client killed; give up: */
+ nfsd4_release_cb(cb);
+ return;
+ }
+ rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
+ cb->cb_ops, cb);
+}
void nfsd4_cb_recall(struct nfs4_delegation *dp)
{
+ struct nfsd4_callback *cb = &dp->dl_recall;
+
+ dp->dl_retries = 1;
+ cb->cb_op = dp;
+ cb->cb_clp = dp->dl_client;
+ cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL];
+ cb->cb_msg.rpc_argp = cb;
+ cb->cb_msg.rpc_resp = cb;
+ cb->cb_msg.rpc_cred = callback_cred;
+
+ cb->cb_ops = &nfsd4_cb_recall_ops;
+ dp->dl_retries = 1;
+
queue_work(callback_wq, &dp->dl_recall.cb_work);
}
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index c78dbf493424..f0695e815f0e 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -482,109 +482,26 @@ nfsd_idmap_shutdown(void)
cache_unregister(&nametoid_cache);
}
-/*
- * Deferred request handling
- */
-
-struct idmap_defer_req {
- struct cache_req req;
- struct cache_deferred_req deferred_req;
- wait_queue_head_t waitq;
- atomic_t count;
-};
-
-static inline void
-put_mdr(struct idmap_defer_req *mdr)
-{
- if (atomic_dec_and_test(&mdr->count))
- kfree(mdr);
-}
-
-static inline void
-get_mdr(struct idmap_defer_req *mdr)
-{
- atomic_inc(&mdr->count);
-}
-
-static void
-idmap_revisit(struct cache_deferred_req *dreq, int toomany)
-{
- struct idmap_defer_req *mdr =
- container_of(dreq, struct idmap_defer_req, deferred_req);
-
- wake_up(&mdr->waitq);
- put_mdr(mdr);
-}
-
-static struct cache_deferred_req *
-idmap_defer(struct cache_req *req)
-{
- struct idmap_defer_req *mdr =
- container_of(req, struct idmap_defer_req, req);
-
- mdr->deferred_req.revisit = idmap_revisit;
- get_mdr(mdr);
- return (&mdr->deferred_req);
-}
-
-static inline int
-do_idmap_lookup(struct ent *(*lookup_fn)(struct ent *), struct ent *key,
- struct cache_detail *detail, struct ent **item,
- struct idmap_defer_req *mdr)
-{
- *item = lookup_fn(key);
- if (!*item)
- return -ENOMEM;
- return cache_check(detail, &(*item)->h, &mdr->req);
-}
-
-static inline int
-do_idmap_lookup_nowait(struct ent *(*lookup_fn)(struct ent *),
- struct ent *key, struct cache_detail *detail,
- struct ent **item)
-{
- int ret = -ENOMEM;
-
- *item = lookup_fn(key);
- if (!*item)
- goto out_err;
- ret = -ETIMEDOUT;
- if (!test_bit(CACHE_VALID, &(*item)->h.flags)
- || (*item)->h.expiry_time < get_seconds()
- || detail->flush_time > (*item)->h.last_refresh)
- goto out_put;
- ret = -ENOENT;
- if (test_bit(CACHE_NEGATIVE, &(*item)->h.flags))
- goto out_put;
- return 0;
-out_put:
- cache_put(&(*item)->h, detail);
-out_err:
- *item = NULL;
- return ret;
-}
-
static int
idmap_lookup(struct svc_rqst *rqstp,
struct ent *(*lookup_fn)(struct ent *), struct ent *key,
struct cache_detail *detail, struct ent **item)
{
- struct idmap_defer_req *mdr;
int ret;
- mdr = kzalloc(sizeof(*mdr), GFP_KERNEL);
- if (!mdr)
+ *item = lookup_fn(key);
+ if (!*item)
return -ENOMEM;
- atomic_set(&mdr->count, 1);
- init_waitqueue_head(&mdr->waitq);
- mdr->req.defer = idmap_defer;
- ret = do_idmap_lookup(lookup_fn, key, detail, item, mdr);
- if (ret == -EAGAIN) {
- wait_event_interruptible_timeout(mdr->waitq,
- test_bit(CACHE_VALID, &(*item)->h.flags), 1 * HZ);
- ret = do_idmap_lookup_nowait(lookup_fn, key, detail, item);
+ retry:
+ ret = cache_check(detail, &(*item)->h, &rqstp->rq_chandle);
+
+ if (ret == -ETIMEDOUT) {
+ struct ent *prev_item = *item;
+ *item = lookup_fn(key);
+ if (*item != prev_item)
+ goto retry;
+ cache_put(&(*item)->h, detail);
}
- put_mdr(mdr);
return ret;
}
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 59ec449b0c7f..0cdfd022bb7b 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1031,8 +1031,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
resp->cstate.session = NULL;
fh_init(&resp->cstate.current_fh, NFS4_FHSIZE);
fh_init(&resp->cstate.save_fh, NFS4_FHSIZE);
- /* Use the deferral mechanism only for NFSv4.0 compounds */
- rqstp->rq_usedeferral = (args->minorversion == 0);
+ /*
+ * Don't use the deferral mechanism for NFSv4; compounds make it
+ * too hard to avoid non-idempotency problems.
+ */
+ rqstp->rq_usedeferral = 0;
/*
* According to RFC3010, this takes precedence over all other errors.
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index a7292fcf7718..9019e8ec9dc8 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -207,7 +207,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
{
struct nfs4_delegation *dp;
struct nfs4_file *fp = stp->st_file;
- struct nfs4_cb_conn *cb = &stp->st_stateowner->so_client->cl_cb_conn;
dprintk("NFSD alloc_init_deleg\n");
/*
@@ -234,7 +233,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
nfs4_file_get_access(fp, O_RDONLY);
dp->dl_flock = NULL;
dp->dl_type = type;
- dp->dl_ident = cb->cb_ident;
dp->dl_stateid.si_boot = boot_time;
dp->dl_stateid.si_stateownerid = current_delegid++;
dp->dl_stateid.si_fileid = 0;
@@ -535,171 +533,258 @@ gen_sessionid(struct nfsd4_session *ses)
*/
#define NFSD_MIN_HDR_SEQ_SZ (24 + 12 + 44)
+static void
+free_session_slots(struct nfsd4_session *ses)
+{
+ int i;
+
+ for (i = 0; i < ses->se_fchannel.maxreqs; i++)
+ kfree(ses->se_slots[i]);
+}
+
/*
- * Give the client the number of ca_maxresponsesize_cached slots it
- * requests, of size bounded by NFSD_SLOT_CACHE_SIZE,
- * NFSD_MAX_MEM_PER_SESSION, and nfsd_drc_max_mem. Do not allow more
- * than NFSD_MAX_SLOTS_PER_SESSION.
- *
- * If we run out of reserved DRC memory we should (up to a point)
+ * We don't actually need to cache the rpc and session headers, so we
+ * can allocate a little less for each slot:
+ */
+static inline int slot_bytes(struct nfsd4_channel_attrs *ca)
+{
+ return ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ;
+}
+
+static int nfsd4_sanitize_slot_size(u32 size)
+{
+ size -= NFSD_MIN_HDR_SEQ_SZ; /* We don't cache the rpc header */
+ size = min_t(u32, size, NFSD_SLOT_CACHE_SIZE);
+
+ return size;
+}
+
+/*
+ * XXX: If we run out of reserved DRC memory we could (up to a point)
* re-negotiate active sessions and reduce their slot usage to make
* rooom for new connections. For now we just fail the create session.
*/
-static int set_forechannel_drc_size(struct nfsd4_channel_attrs *fchan)
+static int nfsd4_get_drc_mem(int slotsize, u32 num)
{
- int mem, size = fchan->maxresp_cached;
+ int avail;
- if (fchan->maxreqs < 1)
- return nfserr_inval;
+ num = min_t(u32, num, NFSD_MAX_SLOTS_PER_SESSION);
- if (size < NFSD_MIN_HDR_SEQ_SZ)
- size = NFSD_MIN_HDR_SEQ_SZ;
- size -= NFSD_MIN_HDR_SEQ_SZ;
- if (size > NFSD_SLOT_CACHE_SIZE)
- size = NFSD_SLOT_CACHE_SIZE;
-
- /* bound the maxreqs by NFSD_MAX_MEM_PER_SESSION */
- mem = fchan->maxreqs * size;
- if (mem > NFSD_MAX_MEM_PER_SESSION) {
- fchan->maxreqs = NFSD_MAX_MEM_PER_SESSION / size;
- if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION)
- fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION;
- mem = fchan->maxreqs * size;
- }
+ spin_lock(&nfsd_drc_lock);
+ avail = min_t(int, NFSD_MAX_MEM_PER_SESSION,
+ nfsd_drc_max_mem - nfsd_drc_mem_used);
+ num = min_t(int, num, avail / slotsize);
+ nfsd_drc_mem_used += num * slotsize;
+ spin_unlock(&nfsd_drc_lock);
+ return num;
+}
+
+static void nfsd4_put_drc_mem(int slotsize, int num)
+{
spin_lock(&nfsd_drc_lock);
- /* bound the total session drc memory ussage */
- if (mem + nfsd_drc_mem_used > nfsd_drc_max_mem) {
- fchan->maxreqs = (nfsd_drc_max_mem - nfsd_drc_mem_used) / size;
- mem = fchan->maxreqs * size;
- }
- nfsd_drc_mem_used += mem;
+ nfsd_drc_mem_used -= slotsize * num;
spin_unlock(&nfsd_drc_lock);
+}
- if (fchan->maxreqs == 0)
- return nfserr_jukebox;
+static struct nfsd4_session *alloc_session(int slotsize, int numslots)
+{
+ struct nfsd4_session *new;
+ int mem, i;
- fchan->maxresp_cached = size + NFSD_MIN_HDR_SEQ_SZ;
- return 0;
+ BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot *)
+ + sizeof(struct nfsd4_session) > PAGE_SIZE);
+ mem = numslots * sizeof(struct nfsd4_slot *);
+
+ new = kzalloc(sizeof(*new) + mem, GFP_KERNEL);
+ if (!new)
+ return NULL;
+ /* allocate each struct nfsd4_slot and data cache in one piece */
+ for (i = 0; i < numslots; i++) {
+ mem = sizeof(struct nfsd4_slot) + slotsize;
+ new->se_slots[i] = kzalloc(mem, GFP_KERNEL);
+ if (!new->se_slots[i])
+ goto out_free;
+ }
+ return new;
+out_free:
+ while (i--)
+ kfree(new->se_slots[i]);
+ kfree(new);
+ return NULL;
}
-/*
- * fchan holds the client values on input, and the server values on output
- * sv_max_mesg is the maximum payload plus one page for overhead.
- */
-static int init_forechannel_attrs(struct svc_rqst *rqstp,
- struct nfsd4_channel_attrs *session_fchan,
- struct nfsd4_channel_attrs *fchan)
+static void init_forechannel_attrs(struct nfsd4_channel_attrs *new, struct nfsd4_channel_attrs *req, int numslots, int slotsize)
{
- int status = 0;
- __u32 maxcount = nfsd_serv->sv_max_mesg;
+ u32 maxrpc = nfsd_serv->sv_max_mesg;
- /* headerpadsz set to zero in encode routine */
+ new->maxreqs = numslots;
+ new->maxresp_cached = slotsize + NFSD_MIN_HDR_SEQ_SZ;
+ new->maxreq_sz = min_t(u32, req->maxreq_sz, maxrpc);
+ new->maxresp_sz = min_t(u32, req->maxresp_sz, maxrpc);
+ new->maxops = min_t(u32, req->maxops, NFSD_MAX_OPS_PER_COMPOUND);
+}
- /* Use the client's max request and max response size if possible */
- if (fchan->maxreq_sz > maxcount)
- fchan->maxreq_sz = maxcount;
- session_fchan->maxreq_sz = fchan->maxreq_sz;
+static void free_conn(struct nfsd4_conn *c)
+{
+ svc_xprt_put(c->cn_xprt);
+ kfree(c);
+}
- if (fchan->maxresp_sz > maxcount)
- fchan->maxresp_sz = maxcount;
- session_fchan->maxresp_sz = fchan->maxresp_sz;
+static void nfsd4_conn_lost(struct svc_xpt_user *u)
+{
+ struct nfsd4_conn *c = container_of(u, struct nfsd4_conn, cn_xpt_user);
+ struct nfs4_client *clp = c->cn_session->se_client;
- /* Use the client's maxops if possible */
- if (fchan->maxops > NFSD_MAX_OPS_PER_COMPOUND)
- fchan->maxops = NFSD_MAX_OPS_PER_COMPOUND;
- session_fchan->maxops = fchan->maxops;
+ spin_lock(&clp->cl_lock);
+ if (!list_empty(&c->cn_persession)) {
+ list_del(&c->cn_persession);
+ free_conn(c);
+ }
+ spin_unlock(&clp->cl_lock);
+}
- /* FIXME: Error means no more DRC pages so the server should
- * recover pages from existing sessions. For now fail session
- * creation.
- */
- status = set_forechannel_drc_size(fchan);
+static struct nfsd4_conn *alloc_conn(struct svc_rqst *rqstp, u32 flags)
+{
+ struct nfsd4_conn *conn;
- session_fchan->maxresp_cached = fchan->maxresp_cached;
- session_fchan->maxreqs = fchan->maxreqs;
+ conn = kmalloc(sizeof(struct nfsd4_conn), GFP_KERNEL);
+ if (!conn)
+ return NULL;
+ svc_xprt_get(rqstp->rq_xprt);
+ conn->cn_xprt = rqstp->rq_xprt;
+ conn->cn_flags = flags;
+ INIT_LIST_HEAD(&conn->cn_xpt_user.list);
+ return conn;
+}
- dprintk("%s status %d\n", __func__, status);
- return status;
+static void __nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses)
+{
+ conn->cn_session = ses;
+ list_add(&conn->cn_persession, &ses->se_conns);
}
-static void
-free_session_slots(struct nfsd4_session *ses)
+static void nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses)
{
- int i;
+ struct nfs4_client *clp = ses->se_client;
- for (i = 0; i < ses->se_fchannel.maxreqs; i++)
- kfree(ses->se_slots[i]);
+ spin_lock(&clp->cl_lock);
+ __nfsd4_hash_conn(conn, ses);
+ spin_unlock(&clp->cl_lock);
}
-/*
- * We don't actually need to cache the rpc and session headers, so we
- * can allocate a little less for each slot:
- */
-static inline int slot_bytes(struct nfsd4_channel_attrs *ca)
+static void nfsd4_register_conn(struct nfsd4_conn *conn)
{
- return ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ;
+ conn->cn_xpt_user.callback = nfsd4_conn_lost;
+ register_xpt_user(conn->cn_xprt, &conn->cn_xpt_user);
}
-static int
-alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
- struct nfsd4_create_session *cses)
+static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses)
{
- struct nfsd4_session *new, tmp;
- struct nfsd4_slot *sp;
- int idx, slotsize, cachesize, i;
- int status;
+ struct nfsd4_conn *conn;
+ u32 flags = NFS4_CDFC4_FORE;
- memset(&tmp, 0, sizeof(tmp));
+ if (ses->se_flags & SESSION4_BACK_CHAN)
+ flags |= NFS4_CDFC4_BACK;
+ conn = alloc_conn(rqstp, flags);
+ if (!conn)
+ return nfserr_jukebox;
+ nfsd4_hash_conn(conn, ses);
+ nfsd4_register_conn(conn);
+ return nfs_ok;
+}
- /* FIXME: For now, we just accept the client back channel attributes. */
- tmp.se_bchannel = cses->back_channel;
- status = init_forechannel_attrs(rqstp, &tmp.se_fchannel,
- &cses->fore_channel);
- if (status)
- goto out;
+static void nfsd4_del_conns(struct nfsd4_session *s)
+{
+ struct nfs4_client *clp = s->se_client;
+ struct nfsd4_conn *c;
- BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot)
- + sizeof(struct nfsd4_session) > PAGE_SIZE);
+ spin_lock(&clp->cl_lock);
+ while (!list_empty(&s->se_conns)) {
+ c = list_first_entry(&s->se_conns, struct nfsd4_conn, cn_persession);
+ list_del_init(&c->cn_persession);
+ spin_unlock(&clp->cl_lock);
- status = nfserr_jukebox;
- /* allocate struct nfsd4_session and slot table pointers in one piece */
- slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot *);
- new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL);
- if (!new)
- goto out;
+ unregister_xpt_user(c->cn_xprt, &c->cn_xpt_user);
+ free_conn(c);
- memcpy(new, &tmp, sizeof(*new));
+ spin_lock(&clp->cl_lock);
+ }
+ spin_unlock(&clp->cl_lock);
+}
- /* allocate each struct nfsd4_slot and data cache in one piece */
- cachesize = slot_bytes(&new->se_fchannel);
- for (i = 0; i < new->se_fchannel.maxreqs; i++) {
- sp = kzalloc(sizeof(*sp) + cachesize, GFP_KERNEL);
- if (!sp)
- goto out_free;
- new->se_slots[i] = sp;
+void free_session(struct kref *kref)
+{
+ struct nfsd4_session *ses;
+ int mem;
+
+ ses = container_of(kref, struct nfsd4_session, se_ref);
+ nfsd4_del_conns(ses);
+ spin_lock(&nfsd_drc_lock);
+ mem = ses->se_fchannel.maxreqs * slot_bytes(&ses->se_fchannel);
+ nfsd_drc_mem_used -= mem;
+ spin_unlock(&nfsd_drc_lock);
+ free_session_slots(ses);
+ kfree(ses);
+}
+
+static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, struct nfsd4_create_session *cses)
+{
+ struct nfsd4_session *new;
+ struct nfsd4_channel_attrs *fchan = &cses->fore_channel;
+ int numslots, slotsize;
+ int status;
+ int idx;
+
+ /*
+ * Note decreasing slot size below client's request may
+ * make it difficult for client to function correctly, whereas
+ * decreasing the number of slots will (just?) affect
+ * performance. When short on memory we therefore prefer to
+ * decrease number of slots instead of their size.
+ */
+ slotsize = nfsd4_sanitize_slot_size(fchan->maxresp_cached);
+ numslots = nfsd4_get_drc_mem(slotsize, fchan->maxreqs);
+
+ new = alloc_session(slotsize, numslots);
+ if (!new) {
+ nfsd4_put_drc_mem(slotsize, fchan->maxreqs);
+ return NULL;
}
+ init_forechannel_attrs(&new->se_fchannel, fchan, numslots, slotsize);
new->se_client = clp;
gen_sessionid(new);
- idx = hash_sessionid(&new->se_sessionid);
- memcpy(clp->cl_sessionid.data, new->se_sessionid.data,
- NFS4_MAX_SESSIONID_LEN);
+ INIT_LIST_HEAD(&new->se_conns);
+
+ new->se_cb_seq_nr = 1;
new->se_flags = cses->flags;
+ new->se_cb_prog = cses->callback_prog;
kref_init(&new->se_ref);
+ idx = hash_sessionid(&new->se_sessionid);
spin_lock(&client_lock);
list_add(&new->se_hash, &sessionid_hashtbl[idx]);
list_add(&new->se_perclnt, &clp->cl_sessions);
spin_unlock(&client_lock);
- status = nfs_ok;
-out:
- return status;
-out_free:
- free_session_slots(new);
- kfree(new);
- goto out;
+ status = nfsd4_new_conn(rqstp, new);
+ /* whoops: benny points out, status is ignored! (err, or bogus) */
+ if (status) {
+ free_session(&new->se_ref);
+ return NULL;
+ }
+ if (!clp->cl_cb_session && (cses->flags & SESSION4_BACK_CHAN)) {
+ struct sockaddr *sa = svc_addr(rqstp);
+
+ clp->cl_cb_session = new;
+ clp->cl_cb_conn.cb_xprt = rqstp->rq_xprt;
+ svc_xprt_get(rqstp->rq_xprt);
+ rpc_copy_addr((struct sockaddr *)&clp->cl_cb_conn.cb_addr, sa);
+ clp->cl_cb_conn.cb_addrlen = svc_addr_len(sa);
+ nfsd4_probe_callback(clp);
+ }
+ return new;
}
/* caller must hold client_lock */
@@ -731,21 +816,6 @@ unhash_session(struct nfsd4_session *ses)
list_del(&ses->se_perclnt);
}
-void
-free_session(struct kref *kref)
-{
- struct nfsd4_session *ses;
- int mem;
-
- ses = container_of(kref, struct nfsd4_session, se_ref);
- spin_lock(&nfsd_drc_lock);
- mem = ses->se_fchannel.maxreqs * slot_bytes(&ses->se_fchannel);
- nfsd_drc_mem_used -= mem;
- spin_unlock(&nfsd_drc_lock);
- free_session_slots(ses);
- kfree(ses);
-}
-
/* must be called under the client_lock */
static inline void
renew_client_locked(struct nfs4_client *clp)
@@ -812,6 +882,13 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
static inline void
free_client(struct nfs4_client *clp)
{
+ while (!list_empty(&clp->cl_sessions)) {
+ struct nfsd4_session *ses;
+ ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
+ se_perclnt);
+ list_del(&ses->se_perclnt);
+ nfsd4_put_session(ses);
+ }
if (clp->cl_cred.cr_group_info)
put_group_info(clp->cl_cred.cr_group_info);
kfree(clp->cl_principal);
@@ -838,15 +915,12 @@ release_session_client(struct nfsd4_session *session)
static inline void
unhash_client_locked(struct nfs4_client *clp)
{
+ struct nfsd4_session *ses;
+
mark_client_expired(clp);
list_del(&clp->cl_lru);
- while (!list_empty(&clp->cl_sessions)) {
- struct nfsd4_session *ses;
- ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
- se_perclnt);
- unhash_session(ses);
- nfsd4_put_session(ses);
- }
+ list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
+ list_del_init(&ses->se_hash);
}
static void
@@ -875,7 +949,7 @@ expire_client(struct nfs4_client *clp)
sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient);
release_openowner(sop);
}
- nfsd4_set_callback_client(clp, NULL);
+ nfsd4_shutdown_callback(clp);
if (clp->cl_cb_conn.cb_xprt)
svc_xprt_put(clp->cl_cb_conn.cb_xprt);
list_del(&clp->cl_idhash);
@@ -960,6 +1034,8 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
if (clp == NULL)
return NULL;
+ INIT_LIST_HEAD(&clp->cl_sessions);
+
princ = svc_gss_principal(rqstp);
if (princ) {
clp->cl_principal = kstrdup(princ, GFP_KERNEL);
@@ -976,8 +1052,9 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
INIT_LIST_HEAD(&clp->cl_strhash);
INIT_LIST_HEAD(&clp->cl_openowners);
INIT_LIST_HEAD(&clp->cl_delegations);
- INIT_LIST_HEAD(&clp->cl_sessions);
INIT_LIST_HEAD(&clp->cl_lru);
+ spin_lock_init(&clp->cl_lock);
+ INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_do_callback_rpc);
clp->cl_time = get_seconds();
clear_bit(0, &clp->cl_cb_slot_busy);
rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
@@ -986,7 +1063,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
clp->cl_flavor = rqstp->rq_flavor;
copy_cred(&clp->cl_cred, &rqstp->rq_cred);
gen_confirm(clp);
-
+ clp->cl_cb_session = NULL;
return clp;
}
@@ -1098,7 +1175,7 @@ find_unconfirmed_client_by_str(const char *dname, unsigned int hashval,
static void
gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid)
{
- struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
+ struct nfs4_cb_conn *conn = &clp->cl_cb_conn;
unsigned short expected_family;
/* Currently, we only support tcp and tcp6 for the callback channel */
@@ -1111,24 +1188,23 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid)
else
goto out_err;
- cb->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val,
+ conn->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val,
se->se_callback_addr_len,
- (struct sockaddr *) &cb->cb_addr,
- sizeof(cb->cb_addr));
+ (struct sockaddr *)&conn->cb_addr,
+ sizeof(conn->cb_addr));
- if (!cb->cb_addrlen || cb->cb_addr.ss_family != expected_family)
+ if (!conn->cb_addrlen || conn->cb_addr.ss_family != expected_family)
goto out_err;
- if (cb->cb_addr.ss_family == AF_INET6)
- ((struct sockaddr_in6 *) &cb->cb_addr)->sin6_scope_id = scopeid;
+ if (conn->cb_addr.ss_family == AF_INET6)
+ ((struct sockaddr_in6 *)&conn->cb_addr)->sin6_scope_id = scopeid;
- cb->cb_minorversion = 0;
- cb->cb_prog = se->se_callback_prog;
- cb->cb_ident = se->se_callback_ident;
+ conn->cb_prog = se->se_callback_prog;
+ conn->cb_ident = se->se_callback_ident;
return;
out_err:
- cb->cb_addr.ss_family = AF_UNSPEC;
- cb->cb_addrlen = 0;
+ conn->cb_addr.ss_family = AF_UNSPEC;
+ conn->cb_addrlen = 0;
dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
"will not receive delegations\n",
clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
@@ -1415,7 +1491,9 @@ nfsd4_create_session(struct svc_rqst *rqstp,
{
struct sockaddr *sa = svc_addr(rqstp);
struct nfs4_client *conf, *unconf;
+ struct nfsd4_session *new;
struct nfsd4_clid_slot *cs_slot = NULL;
+ bool confirm_me = false;
int status = 0;
nfs4_lock_state();
@@ -1438,7 +1516,6 @@ nfsd4_create_session(struct svc_rqst *rqstp,
cs_slot->sl_seqid, cr_ses->seqid);
goto out;
}
- cs_slot->sl_seqid++;
} else if (unconf) {
if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) ||
!rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) {
@@ -1451,25 +1528,10 @@ nfsd4_create_session(struct svc_rqst *rqstp,
if (status) {
/* an unconfirmed replay returns misordered */
status = nfserr_seq_misordered;
- goto out_cache;
+ goto out;
}
- cs_slot->sl_seqid++; /* from 0 to 1 */
- move_to_confirmed(unconf);
-
- if (cr_ses->flags & SESSION4_BACK_CHAN) {
- unconf->cl_cb_conn.cb_xprt = rqstp->rq_xprt;
- svc_xprt_get(rqstp->rq_xprt);
- rpc_copy_addr(
- (struct sockaddr *)&unconf->cl_cb_conn.cb_addr,
- sa);
- unconf->cl_cb_conn.cb_addrlen = svc_addr_len(sa);
- unconf->cl_cb_conn.cb_minorversion =
- cstate->minorversion;
- unconf->cl_cb_conn.cb_prog = cr_ses->callback_prog;
- unconf->cl_cb_seq_nr = 1;
- nfsd4_probe_callback(unconf, &unconf->cl_cb_conn);
- }
+ confirm_me = true;
conf = unconf;
} else {
status = nfserr_stale_clientid;
@@ -1477,22 +1539,30 @@ nfsd4_create_session(struct svc_rqst *rqstp,
}
/*
+ * XXX: we should probably set this at creation time, and check
+ * for consistent minorversion use throughout:
+ */
+ conf->cl_minorversion = 1;
+ /*
* We do not support RDMA or persistent sessions
*/
cr_ses->flags &= ~SESSION4_PERSIST;
cr_ses->flags &= ~SESSION4_RDMA;
- status = alloc_init_session(rqstp, conf, cr_ses);
- if (status)
+ status = nfserr_jukebox;
+ new = alloc_init_session(rqstp, conf, cr_ses);
+ if (!new)
goto out;
-
- memcpy(cr_ses->sessionid.data, conf->cl_sessionid.data,
+ status = nfs_ok;
+ memcpy(cr_ses->sessionid.data, new->se_sessionid.data,
NFS4_MAX_SESSIONID_LEN);
+ cs_slot->sl_seqid++;
cr_ses->seqid = cs_slot->sl_seqid;
-out_cache:
/* cache solo and embedded create sessions under the state lock */
nfsd4_cache_create_session(cr_ses, cs_slot, status);
+ if (confirm_me)
+ move_to_confirmed(conf);
out:
nfs4_unlock_state();
dprintk("%s returns %d\n", __func__, ntohl(status));
@@ -1546,8 +1616,11 @@ nfsd4_destroy_session(struct svc_rqst *r,
nfs4_lock_state();
/* wait for callbacks */
- nfsd4_set_callback_client(ses->se_client, NULL);
+ nfsd4_shutdown_callback(ses->se_client);
nfs4_unlock_state();
+
+ nfsd4_del_conns(ses);
+
nfsd4_put_session(ses);
status = nfs_ok;
out:
@@ -1555,6 +1628,36 @@ out:
return status;
}
+static struct nfsd4_conn *__nfsd4_find_conn(struct svc_xprt *xpt, struct nfsd4_session *s)
+{
+ struct nfsd4_conn *c;
+
+ list_for_each_entry(c, &s->se_conns, cn_persession) {
+ if (c->cn_xprt == xpt) {
+ return c;
+ }
+ }
+ return NULL;
+}
+
+static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_session *ses)
+{
+ struct nfs4_client *clp = ses->se_client;
+ struct nfsd4_conn *c;
+
+ spin_lock(&clp->cl_lock);
+ c = __nfsd4_find_conn(new->cn_xprt, ses);
+ if (c) {
+ spin_unlock(&clp->cl_lock);
+ free_conn(new);
+ return;
+ }
+ __nfsd4_hash_conn(new, ses);
+ spin_unlock(&clp->cl_lock);
+ nfsd4_register_conn(new);
+ return;
+}
+
__be32
nfsd4_sequence(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate,
@@ -1563,11 +1666,20 @@ nfsd4_sequence(struct svc_rqst *rqstp,
struct nfsd4_compoundres *resp = rqstp->rq_resp;
struct nfsd4_session *session;
struct nfsd4_slot *slot;
+ struct nfsd4_conn *conn;
int status;
if (resp->opcnt != 1)
return nfserr_sequence_pos;
+ /*
+ * Will be either used or freed by nfsd4_sequence_check_conn
+ * below.
+ */
+ conn = alloc_conn(rqstp, NFS4_CDFC4_FORE);
+ if (!conn)
+ return nfserr_jukebox;
+
spin_lock(&client_lock);
status = nfserr_badsession;
session = find_in_sessionid_hashtbl(&seq->sessionid);
@@ -1599,6 +1711,9 @@ nfsd4_sequence(struct svc_rqst *rqstp,
if (status)
goto out;
+ nfsd4_sequence_check_conn(conn, session);
+ conn = NULL;
+
/* Success! bump slot seqid */
slot->sl_inuse = true;
slot->sl_seqid = seq->seqid;
@@ -1613,6 +1728,7 @@ out:
nfsd4_get_session(cstate->session);
atomic_inc(&session->se_client->cl_refcount);
}
+ kfree(conn);
spin_unlock(&client_lock);
dprintk("%s: return %d\n", __func__, ntohl(status));
return status;
@@ -1747,6 +1863,11 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
gen_clid(new);
}
+ /*
+ * XXX: we should probably set this at creation time, and check
+ * for consistent minorversion use throughout:
+ */
+ new->cl_minorversion = 0;
gen_callback(new, setclid, rpc_get_scope_id(sa));
add_to_unconfirmed(new, strhashval);
setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot;
@@ -1807,7 +1928,8 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
status = nfserr_clid_inuse;
else {
atomic_set(&conf->cl_cb_set, 0);
- nfsd4_probe_callback(conf, &unconf->cl_cb_conn);
+ nfsd4_change_callback(conf, &unconf->cl_cb_conn);
+ nfsd4_probe_callback(conf);
expire_client(unconf);
status = nfs_ok;
@@ -1841,7 +1963,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
}
move_to_confirmed(unconf);
conf = unconf;
- nfsd4_probe_callback(conf, &conf->cl_cb_conn);
+ nfsd4_probe_callback(conf);
status = nfs_ok;
}
} else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm)))
@@ -2944,7 +3066,11 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
if (STALE_STATEID(stateid))
goto out;
- status = nfserr_bad_stateid;
+ /*
+ * We assume that any stateid that has the current boot time,
+ * but that we can't find, is expired:
+ */
+ status = nfserr_expired;
if (is_delegation_stateid(stateid)) {
dp = find_delegation_stateid(ino, stateid);
if (!dp)
@@ -2964,6 +3090,7 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
stp = find_stateid(stateid, flags);
if (!stp)
goto out;
+ status = nfserr_bad_stateid;
if (nfs4_check_fh(current_fh, stp))
goto out;
if (!stp->st_stateowner->so_confirmed)
@@ -3038,8 +3165,9 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
* a replayed close:
*/
sop = search_close_lru(stateid->si_stateownerid, flags);
+ /* It's not stale; let's assume it's expired: */
if (sop == NULL)
- return nfserr_bad_stateid;
+ return nfserr_expired;
*sopp = sop;
goto check_replay;
}
@@ -3304,6 +3432,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
status = nfserr_bad_stateid;
if (!is_delegation_stateid(stateid))
goto out;
+ status = nfserr_expired;
dp = find_delegation_stateid(inode, stateid);
if (!dp)
goto out;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 1a468bbd330f..f35a94a04026 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1805,19 +1805,23 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
goto out_nfserr;
}
}
- if ((buflen -= 16) < 0)
- goto out_resource;
- if (unlikely(bmval2)) {
+ if (bmval2) {
+ if ((buflen -= 16) < 0)
+ goto out_resource;
WRITE32(3);
WRITE32(bmval0);
WRITE32(bmval1);
WRITE32(bmval2);
- } else if (likely(bmval1)) {
+ } else if (bmval1) {
+ if ((buflen -= 12) < 0)
+ goto out_resource;
WRITE32(2);
WRITE32(bmval0);
WRITE32(bmval1);
} else {
+ if ((buflen -= 8) < 0)
+ goto out_resource;
WRITE32(1);
WRITE32(bmval0);
}
@@ -1828,15 +1832,17 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
u32 word1 = nfsd_suppattrs1(minorversion);
u32 word2 = nfsd_suppattrs2(minorversion);
- if ((buflen -= 12) < 0)
- goto out_resource;
if (!aclsupport)
word0 &= ~FATTR4_WORD0_ACL;
if (!word2) {
+ if ((buflen -= 12) < 0)
+ goto out_resource;
WRITE32(2);
WRITE32(word0);
WRITE32(word1);
} else {
+ if ((buflen -= 16) < 0)
+ goto out_resource;
WRITE32(3);
WRITE32(word0);
WRITE32(word1);
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 06fa87e52e82..d6dc3f61f8ba 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -22,6 +22,7 @@
*/
enum {
NFSD_Root = 1,
+#ifdef CONFIG_NFSD_DEPRECATED
NFSD_Svc,
NFSD_Add,
NFSD_Del,
@@ -29,6 +30,7 @@ enum {
NFSD_Unexport,
NFSD_Getfd,
NFSD_Getfs,
+#endif
NFSD_List,
NFSD_Export_features,
NFSD_Fh,
@@ -54,6 +56,7 @@ enum {
/*
* write() for these nodes.
*/
+#ifdef CONFIG_NFSD_DEPRECATED
static ssize_t write_svc(struct file *file, char *buf, size_t size);
static ssize_t write_add(struct file *file, char *buf, size_t size);
static ssize_t write_del(struct file *file, char *buf, size_t size);
@@ -61,6 +64,7 @@ static ssize_t write_export(struct file *file, char *buf, size_t size);
static ssize_t write_unexport(struct file *file, char *buf, size_t size);
static ssize_t write_getfd(struct file *file, char *buf, size_t size);
static ssize_t write_getfs(struct file *file, char *buf, size_t size);
+#endif
static ssize_t write_filehandle(struct file *file, char *buf, size_t size);
static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size);
static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size);
@@ -76,6 +80,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
#endif
static ssize_t (*write_op[])(struct file *, char *, size_t) = {
+#ifdef CONFIG_NFSD_DEPRECATED
[NFSD_Svc] = write_svc,
[NFSD_Add] = write_add,
[NFSD_Del] = write_del,
@@ -83,6 +88,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
[NFSD_Unexport] = write_unexport,
[NFSD_Getfd] = write_getfd,
[NFSD_Getfs] = write_getfs,
+#endif
[NFSD_Fh] = write_filehandle,
[NFSD_FO_UnlockIP] = write_unlock_ip,
[NFSD_FO_UnlockFS] = write_unlock_fs,
@@ -121,6 +127,14 @@ static ssize_t nfsctl_transaction_write(struct file *file, const char __user *bu
static ssize_t nfsctl_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos)
{
+ static int warned;
+ if (file->f_dentry->d_name.name[0] == '.' && !warned) {
+ printk(KERN_INFO
+ "Warning: \"%s\" uses deprecated NFSD interface: %s."
+ " This will be removed in 2.6.40\n",
+ current->comm, file->f_dentry->d_name.name);
+ warned = 1;
+ }
if (! file->private_data) {
/* An attempt to read a transaction file without writing
* causes a 0-byte write so that the file can return
@@ -187,6 +201,7 @@ static const struct file_operations pool_stats_operations = {
* payload - write methods
*/
+#ifdef CONFIG_NFSD_DEPRECATED
/**
* write_svc - Start kernel's NFSD server
*
@@ -402,7 +417,7 @@ static ssize_t write_getfs(struct file *file, char *buf, size_t size)
ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6);
- clp = auth_unix_lookup(&in6);
+ clp = auth_unix_lookup(&init_net, &in6);
if (!clp)
err = -EPERM;
else {
@@ -465,7 +480,7 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size)
ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6);
- clp = auth_unix_lookup(&in6);
+ clp = auth_unix_lookup(&init_net, &in6);
if (!clp)
err = -EPERM;
else {
@@ -482,6 +497,7 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size)
out:
return err;
}
+#endif /* CONFIG_NFSD_DEPRECATED */
/**
* write_unlock_ip - Release all locks used by a client
@@ -1000,12 +1016,12 @@ static ssize_t __write_ports_addxprt(char *buf)
if (err != 0)
return err;
- err = svc_create_xprt(nfsd_serv, transport,
+ err = svc_create_xprt(nfsd_serv, transport, &init_net,
PF_INET, port, SVC_SOCK_ANONYMOUS);
if (err < 0)
goto out_err;
- err = svc_create_xprt(nfsd_serv, transport,
+ err = svc_create_xprt(nfsd_serv, transport, &init_net,
PF_INET6, port, SVC_SOCK_ANONYMOUS);
if (err < 0 && err != -EAFNOSUPPORT)
goto out_close;
@@ -1356,6 +1372,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
{
static struct tree_descr nfsd_files[] = {
+#ifdef CONFIG_NFSD_DEPRECATED
[NFSD_Svc] = {".svc", &transaction_ops, S_IWUSR},
[NFSD_Add] = {".add", &transaction_ops, S_IWUSR},
[NFSD_Del] = {".del", &transaction_ops, S_IWUSR},
@@ -1363,6 +1380,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
[NFSD_Unexport] = {".unexport", &transaction_ops, S_IWUSR},
[NFSD_Getfd] = {".getfd", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_Getfs] = {".getfs", &transaction_ops, S_IWUSR|S_IRUSR},
+#endif
[NFSD_List] = {"exports", &exports_operations, S_IRUGO},
[NFSD_Export_features] = {"export_features",
&export_features_operations, S_IRUGO},
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index b76ac3a82e39..6b641cf2c19a 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -249,7 +249,7 @@ extern time_t nfsd4_grace;
#define COMPOUND_SLACK_SPACE 140 /* OP_GETFH */
#define COMPOUND_ERR_SLACK_SPACE 12 /* OP_SETATTR */
-#define NFSD_LAUNDROMAT_MINTIMEOUT 10 /* seconds */
+#define NFSD_LAUNDROMAT_MINTIMEOUT 1 /* seconds */
/*
* The following attributes are currently not supported by the NFSv4 server:
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index e2c43464f237..2bae1d86f5f2 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -16,6 +16,7 @@
#include <linux/lockd/bind.h>
#include <linux/nfsacl.h>
#include <linux/seq_file.h>
+#include <net/net_namespace.h>
#include "nfsd.h"
#include "cache.h"
#include "vfs.h"
@@ -186,12 +187,12 @@ static int nfsd_init_socks(int port)
if (!list_empty(&nfsd_serv->sv_permsocks))
return 0;
- error = svc_create_xprt(nfsd_serv, "udp", PF_INET, port,
+ error = svc_create_xprt(nfsd_serv, "udp", &init_net, PF_INET, port,
SVC_SOCK_DEFAULTS);
if (error < 0)
return error;
- error = svc_create_xprt(nfsd_serv, "tcp", PF_INET, port,
+ error = svc_create_xprt(nfsd_serv, "tcp", &init_net, PF_INET, port,
SVC_SOCK_DEFAULTS);
if (error < 0)
return error;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 322518c88e4b..39adc27b0685 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -35,6 +35,7 @@
#ifndef _NFSD4_STATE_H
#define _NFSD4_STATE_H
+#include <linux/sunrpc/svc_xprt.h>
#include <linux/nfsd/nfsfh.h>
#include "nfsfh.h"
@@ -64,19 +65,12 @@ typedef struct {
(s)->si_fileid, \
(s)->si_generation
-struct nfsd4_cb_sequence {
- /* args/res */
- u32 cbs_minorversion;
- struct nfs4_client *cbs_clp;
-};
-
-struct nfs4_rpc_args {
- void *args_op;
- struct nfsd4_cb_sequence args_seq;
-};
-
struct nfsd4_callback {
- struct nfs4_rpc_args cb_args;
+ void *cb_op;
+ struct nfs4_client *cb_clp;
+ u32 cb_minorversion;
+ struct rpc_message cb_msg;
+ const struct rpc_call_ops *cb_ops;
struct work_struct cb_work;
};
@@ -91,7 +85,6 @@ struct nfs4_delegation {
u32 dl_type;
time_t dl_time;
/* For recall: */
- u32 dl_ident;
stateid_t dl_stateid;
struct knfsd_fh dl_fh;
int dl_retries;
@@ -103,8 +96,8 @@ struct nfs4_cb_conn {
/* SETCLIENTID info */
struct sockaddr_storage cb_addr;
size_t cb_addrlen;
- u32 cb_prog;
- u32 cb_minorversion;
+ u32 cb_prog; /* used only in 4.0 case;
+ per-session otherwise */
u32 cb_ident; /* minorversion 0 only */
struct svc_xprt *cb_xprt; /* minorversion 1 only */
};
@@ -160,6 +153,15 @@ struct nfsd4_clid_slot {
struct nfsd4_create_session sl_cr_ses;
};
+struct nfsd4_conn {
+ struct list_head cn_persession;
+ struct svc_xprt *cn_xprt;
+ struct svc_xpt_user cn_xpt_user;
+ struct nfsd4_session *cn_session;
+/* CDFC4_FORE, CDFC4_BACK: */
+ unsigned char cn_flags;
+};
+
struct nfsd4_session {
struct kref se_ref;
struct list_head se_hash; /* hash by sessionid */
@@ -169,6 +171,9 @@ struct nfsd4_session {
struct nfs4_sessionid se_sessionid;
struct nfsd4_channel_attrs se_fchannel;
struct nfsd4_channel_attrs se_bchannel;
+ struct list_head se_conns;
+ u32 se_cb_prog;
+ u32 se_cb_seq_nr;
struct nfsd4_slot *se_slots[]; /* forward channel slots */
};
@@ -221,24 +226,32 @@ struct nfs4_client {
clientid_t cl_clientid; /* generated by server */
nfs4_verifier cl_confirm; /* generated by server */
u32 cl_firststate; /* recovery dir creation */
+ u32 cl_minorversion;
/* for v4.0 and v4.1 callbacks: */
struct nfs4_cb_conn cl_cb_conn;
+#define NFSD4_CLIENT_CB_UPDATE 1
+#define NFSD4_CLIENT_KILL 2
+ unsigned long cl_cb_flags;
struct rpc_clnt *cl_cb_client;
+ u32 cl_cb_ident;
atomic_t cl_cb_set;
+ struct nfsd4_callback cl_cb_null;
+ struct nfsd4_session *cl_cb_session;
+
+ /* for all client information that callback code might need: */
+ spinlock_t cl_lock;
/* for nfs41 */
struct list_head cl_sessions;
struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */
u32 cl_exchange_flags;
- struct nfs4_sessionid cl_sessionid;
/* number of rpc's in progress over an associated session: */
atomic_t cl_refcount;
/* for nfs41 callbacks */
/* We currently support a single back channel with a single slot */
unsigned long cl_cb_slot_busy;
- u32 cl_cb_seq_nr;
struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */
/* wait here for slots */
};
@@ -440,12 +453,13 @@ extern int nfs4_in_grace(void);
extern __be32 nfs4_check_open_reclaim(clientid_t *clid);
extern void nfs4_free_stateowner(struct kref *kref);
extern int set_callback_cred(void);
-extern void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
+extern void nfsd4_probe_callback(struct nfs4_client *clp);
+extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
extern void nfsd4_do_callback_rpc(struct work_struct *);
extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
extern int nfsd4_create_callback_queue(void);
extern void nfsd4_destroy_callback_queue(void);
-extern void nfsd4_set_callback_client(struct nfs4_client *, struct rpc_clnt *);
+extern void nfsd4_shutdown_callback(struct nfs4_client *);
extern void nfs4_put_delegation(struct nfs4_delegation *dp);
extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname);
extern void nfsd4_init_recdir(char *recdir_name);