From 606890c5871aec6f4974676404014dfd939141ec Mon Sep 17 00:00:00 2001
From: Kamal Charan <kcharan@vmware.com>
Date: Wed, 12 Dec 2018 02:43:51 -0800
Subject: [PATCH] Add 9p zero copy data path using crossfd

Add new rpc calls to 9p protocol to allow zero copy
read and write requests from the client to the server.
The client sends only the page addresses of the io
buffer where the data is to be transferred, and the
server directly does the data transfer to that buffer,
and does not send or receive the data over the transport.

These calls are allowed only over vsock transport,
where both the client and the server are on same host,
and server can transfer the data to client memory using
the crossfd feature of ESXi.
---
 include/net/9p/9p.h     |   8 +
 include/net/9p/client.h |   6 +
 net/9p/client.c         | 469 ++++++++++++++++++++++++++++++++++++++----------
 net/9p/protocol.c       |  20 ++-
 4 files changed, 405 insertions(+), 98 deletions(-)

diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index 27dfe85..48dd4c4 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -109,6 +109,10 @@ void _p9_debug(enum p9_debug_flags level, const char *func,
  * @P9_RREAD: response with data requested
  * @P9_TWRITE: reuqest to transfer data to a file
  * @P9_RWRITE: response with out much data was transferred to file
+ * @P9_TREADX: request to zero copy data from a file to user buffer
+ * @P9_RREADX: response with how much data was transferred from file
+ * @P9_TWRITEX: reuqest to zero copy data to a file from user buffer
+ * @P9_RWRITEX: response with how much data was transferred to file
  * @P9_TCLUNK: forget about a handle to an entity within the file system
  * @P9_RCLUNK: response when server has forgotten about the handle
  * @P9_TREMOVE: request to remove an entity from the hierarchy
@@ -168,6 +172,10 @@ enum p9_msg_t {
 	P9_RRENAMEAT,
 	P9_TUNLINKAT = 76,
 	P9_RUNLINKAT,
+	P9_TREADX = 96,
+	P9_RREADX,
+	P9_TWRITEX = 98,
+	P9_RWRITEX,
 	P9_TVERSION = 100,
 	P9_RVERSION,
 	P9_TAUTH = 102,
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index c6b97e5..fd2c189 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -35,12 +35,14 @@
  * @p9_proto_legacy: 9P Legacy mode, pre-9P2000.u
  * @p9_proto_2000u: 9P2000.u extension
  * @p9_proto_2000L: 9P2000.L extension
+ * @p9_proto_2000X: 9P2000.X extension      // dotx zero copy
  */
 
 enum p9_proto_versions{
 	p9_proto_legacy,
 	p9_proto_2000u,
 	p9_proto_2000L,
+	p9_proto_2000X,
 };
 
 
@@ -124,6 +126,8 @@ struct p9_req_t {
  * struct p9_client - per client instance state
  * @lock: protect @fidlist
  * @msize: maximum data size negotiated by protocol
+ * @minzcpages: minimum number of pages for dotx zero copy
+ * @is_dotx_ok: whether zero copy io using dotx can be used
  * @dotu: extension flags negotiated by protocol
  * @proto_version: 9P protocol version to use
  * @trans_mod: module API instantiated with this client
@@ -152,6 +156,8 @@ struct p9_req_t {
 struct p9_client {
 	spinlock_t lock; /* protect client structure */
 	unsigned int msize;
+	unsigned int minzcpages;
+	unsigned int is_dotx_ok;
 	unsigned char proto_version;
 	struct p9_trans_module *trans_mod;
 	enum p9_trans_status status;
diff --git a/net/9p/client.c b/net/9p/client.c
index ed8738c4..cb08602 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -25,6 +25,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
@@ -44,6 +45,9 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/9p.h>
 
+#define DOTX_ZC_MAXPAGES 1024
+#define DOTX_ZC_TRANSPORT "vsock"
+
 /*
   * Client Option Parsing (code inspired by NFS code)
   *  - a little lazy - parse all client options
@@ -54,6 +58,7 @@ enum {
 	Opt_trans,
 	Opt_legacy,
 	Opt_version,
+	Opt_minzcpages,
 	Opt_err,
 };
 
@@ -62,12 +67,14 @@ static const match_table_t tokens = {
 	{Opt_legacy, "noextend"},
 	{Opt_trans, "trans=%s"},
 	{Opt_version, "version=%s"},
+	{Opt_minzcpages, "minzcpages=%d"},
 	{Opt_err, NULL},
 };
 
 inline int p9_is_proto_dotl(struct p9_client *clnt)
 {
-	return clnt->proto_version == p9_proto_2000L;
+	return clnt->proto_version == p9_proto_2000L ||
+	       clnt->proto_version == p9_proto_2000X;
 }
 EXPORT_SYMBOL(p9_is_proto_dotl);
 
@@ -77,6 +84,12 @@ inline int p9_is_proto_dotu(struct p9_client *clnt)
 }
 EXPORT_SYMBOL(p9_is_proto_dotu);
 
+inline int p9_is_proto_dotx(struct p9_client *clnt)
+{
+	return clnt->proto_version == p9_proto_2000X;
+}
+EXPORT_SYMBOL(p9_is_proto_dotx);
+
 /*
  * Some error codes are taken directly from the server replies,
  * make sure they are valid.
@@ -105,6 +118,9 @@ static int get_protocol_version(char *s)
 	} else if (!strcmp(s, "9p2000.L")) {
 		version = p9_proto_2000L;
 		p9_debug(P9_DEBUG_9P, "Protocol version: 9P2000.L\n");
+	} else if (!strcmp(s, "9p2000.X")) {
+		version = p9_proto_2000X;
+		p9_debug(P9_DEBUG_9P, "Protocol version: 9P2000.X\n");
 	} else
 		pr_info("Unknown protocol version %s\n", s);
 
@@ -179,6 +195,16 @@ static int parse_opts(char *opts, struct p9_client *clnt)
 		case Opt_legacy:
 			clnt->proto_version = p9_proto_legacy;
 			break;
+		case Opt_minzcpages:
+			r = match_int(&args[0], &option);
+			if (r < 0 || option < 0) {
+				p9_debug(P9_DEBUG_ERROR,
+					 "integer field, but no/negative integer?\n");
+				ret = r;
+				continue;
+			}
+			clnt->minzcpages = option;
+			break;
 		case Opt_version:
 			s = match_strdup(&args[0]);
 			if (!s) {
@@ -199,6 +225,16 @@ static int parse_opts(char *opts, struct p9_client *clnt)
 			continue;
 		}
 	}
+	if (p9_is_proto_dotx(clnt) &&
+	    (!clnt->trans_mod ||
+	     strcmp(clnt->trans_mod->name, DOTX_ZC_TRANSPORT))) {
+		p9_debug(P9_DEBUG_ERROR,
+			 "dotx version requires %s transport",
+			 DOTX_ZC_TRANSPORT);
+		ret = -EINVAL;
+		goto free_and_return;
+	}
+
 
 free_and_return:
 	kfree(tmp_options);
@@ -938,6 +974,10 @@ static int p9_client_version(struct p9_client *c)
 		 c->msize, c->proto_version);
 
 	switch (c->proto_version) {
+	case p9_proto_2000X:
+		req = p9_client_rpc(c, P9_TVERSION, "ds",
+					c->msize, "9P2000.X");
+		break;
 	case p9_proto_2000L:
 		req = p9_client_rpc(c, P9_TVERSION, "ds",
 					c->msize, "9P2000.L");
@@ -965,7 +1005,9 @@ static int p9_client_version(struct p9_client *c)
 	}
 
 	p9_debug(P9_DEBUG_9P, "<<< RVERSION msize %d %s\n", msize, version);
-	if (!strncmp(version, "9P2000.L", 8))
+	if (!strncmp(version, "9P2000.X", 8))
+		c->proto_version = p9_proto_2000X;
+	else if (!strncmp(version, "9P2000.L", 8))
 		c->proto_version = p9_proto_2000L;
 	else if (!strncmp(version, "9P2000.u", 8))
 		c->proto_version = p9_proto_2000u;
@@ -999,6 +1041,8 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
 
 	clnt->trans_mod = NULL;
 	clnt->trans = NULL;
+	clnt->is_dotx_ok = 0;
+	clnt->minzcpages = 1;
 
 	client_id = utsname()->nodename;
 	memcpy(clnt->name, client_id, strlen(client_id) + 1);
@@ -1534,136 +1578,368 @@ error:
 }
 EXPORT_SYMBOL(p9_client_unlinkat);
 
+static int
+dotx_can_zc(const struct iov_iter *iter, struct p9_client *clnt)
+{
+	return iter_is_iovec(iter) &&
+	       clnt->is_dotx_ok &&
+	       iov_iter_count(iter) >= clnt->minzcpages * PAGE_SIZE;
+}
+
+static int
+p9_client_read_nodotx(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err)
+{
+	struct p9_client *clnt = fid->clnt;
+	struct p9_req_t *req;
+	int total = 0;
+	int count = iov_iter_count(to);
+	int rsize, non_zc = 0;
+	char *dataptr;
+
+	*err = 0;
+
+	rsize = fid->iounit;
+	if (!rsize || rsize > clnt->msize-P9_IOHDRSZ)
+		rsize = clnt->msize - P9_IOHDRSZ;
+
+	if (count < rsize)
+		rsize = count;
+
+	/* Don't bother zerocopy for small IO (< 1024) */
+	if (clnt->trans_mod->zc_request && rsize > 1024) {
+		/*
+		 * response header len is 11
+		 * PDU Header(7) + IO Size (4)
+		 */
+		req = p9_client_zc_rpc(clnt, P9_TREAD, to, NULL, rsize,
+				       0, 11, "dqd", fid->fid, offset, rsize);
+	} else {
+		non_zc = 1;
+		req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset,
+				    rsize);
+	}
+
+	if (IS_ERR(req)) {
+		p9_debug(P9_DEBUG_9P, "          rpc error\n");
+		*err = PTR_ERR(req);
+		return 0;
+	}
+
+	*err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr);
+	if (*err) {
+		p9_debug(P9_DEBUG_9P, "          ret err %d \n", *err);
+		trace_9p_protocol_dump(clnt, req->rc);
+		goto error;
+	}
+	if (rsize < count) {
+		pr_err("bogus RREAD count (%d > %d)\n", count, rsize);
+		count = rsize;
+	}
+
+	if (count == 0)
+		goto error;
+
+	if (non_zc) {
+		int n = copy_to_iter(dataptr, count, to);
+		if (n != count) {
+			*err = -EFAULT;
+			total = n;
+			goto error;
+		}
+	} else {
+		iov_iter_advance(to, count);
+	}
+	total = count;
+
+error:
+	p9_free_req(clnt, req);
+	return total;
+}
+
+static int
+p9_client_read_dotx(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err)
+{
+	struct p9_client *clnt = fid->clnt;
+	struct p9_req_t *req = NULL;
+	int total = 0;
+	int count = iov_iter_count(to);
+	int rsize;
+	size_t off;
+
+	int i, maxpages, npages = 0;
+	struct page **pages = NULL;
+	unsigned long *ppns = NULL;
+
+	*err = 0;
+
+	maxpages = DIV_ROUND_UP(count, PAGE_SIZE);
+	maxpages = min(DOTX_ZC_MAXPAGES, maxpages);
+
+	pages = kmalloc(sizeof(struct page *) * maxpages, GFP_KERNEL);
+	if (!pages) {
+		*err = -ENOMEM;
+		goto error;
+	}
+	ppns = kmalloc(sizeof(unsigned long) * maxpages, GFP_KERNEL);
+	if (!ppns) {
+		*err = -ENOMEM;
+		goto error;
+	}
+
+	rsize = iov_iter_get_pages(to, pages, count, maxpages, &off);
+	if (rsize < 0) {
+		*err = rsize;
+		goto error;
+	}
+
+	npages = DIV_ROUND_UP(off + rsize, PAGE_SIZE);
+	for (i = 0; i < npages; i++) {
+		ppns[i] = page_to_pfn(pages[i]);
+	}
+
+	req = p9_client_rpc(clnt, P9_TREADX, "dqddp", fid->fid, offset, rsize,
+			    (unsigned int) off, npages, ppns);
+	if (IS_ERR(req)) {
+		p9_debug(P9_DEBUG_9P, "          rpc error\n");
+		*err = PTR_ERR(req);
+		goto error;
+	}
+
+	*err = p9pdu_readf(req->rc, clnt->proto_version, "d", &count);
+	if (*err) {
+		p9_debug(P9_DEBUG_9P, "          ret err %d \n", *err);
+		trace_9p_protocol_dump(clnt, req->rc);
+		goto error;
+	}
+	if (rsize < count) {
+		pr_err("bogus RREADX count (%d > %d)\n", count, rsize);
+		goto error;
+	}
+
+	if (count == 0)
+		goto error;
+
+	iov_iter_advance(to, count);
+	total = count;
+
+error:
+	if (req && !IS_ERR(req))
+		p9_free_req(clnt, req);
+
+	for (i = 0; i < npages; i++) {
+		put_page(pages[i]);
+	}
+
+	kfree(pages);
+	kfree(ppns);
+
+	return total;
+}
+
 int
 p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err)
 {
-	struct p9_client *clnt = fid->clnt;
-	struct p9_req_t *req;
 	int total = 0;
+	int is_dotx = p9_is_proto_dotx(fid->clnt);
+
 	*err = 0;
 
-	p9_debug(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n",
-		   fid->fid, (unsigned long long) offset, (int)iov_iter_count(to));
+	p9_debug(P9_DEBUG_9P, ">>> TREAD  fid %d offset %llu count %zd\n",
+		 fid->fid, (unsigned long long) offset, iov_iter_count(to));
 
 	while (iov_iter_count(to)) {
-		int count = iov_iter_count(to);
-		int rsize, non_zc = 0;
-		char *dataptr;
-			
-		rsize = fid->iounit;
-		if (!rsize || rsize > clnt->msize-P9_IOHDRSZ)
-			rsize = clnt->msize - P9_IOHDRSZ;
+		int count;
 
-		if (count < rsize)
-			rsize = count;
-
-		/* Don't bother zerocopy for small IO (< 1024) */
-		if (clnt->trans_mod->zc_request && rsize > 1024) {
-			/*
-			 * response header len is 11
-			 * PDU Header(7) + IO Size (4)
-			 */
-			req = p9_client_zc_rpc(clnt, P9_TREAD, to, NULL, rsize,
-					       0, 11, "dqd", fid->fid,
-					       offset, rsize);
+		if (is_dotx && dotx_can_zc(to, fid->clnt)) {
+			count = p9_client_read_dotx(fid, offset, to, err);
+			if (*err == -ENXIO) {
+				pr_warn("Disabling dotx: No zero copy device\n");
+				fid->clnt->is_dotx_ok = 0;
+				continue;
+			}
 		} else {
-			non_zc = 1;
-			req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset,
-					    rsize);
-		}
-		if (IS_ERR(req)) {
-			*err = PTR_ERR(req);
-			break;
+			count = p9_client_read_nodotx(fid, offset, to, err);
+			if (is_dotx && *err == -ENXIO) {
+				pr_warn("Enabling dotx: Zero copy device available\n");
+				fid->clnt->is_dotx_ok = 1;
+				continue;
+			}
 		}
 
-		*err = p9pdu_readf(req->rc, clnt->proto_version,
-				   "D", &count, &dataptr);
-		if (*err) {
-			trace_9p_protocol_dump(clnt, req->rc);
-			p9_free_req(clnt, req);
+		if (*err || count == 0)
 			break;
-		}
-		if (rsize < count) {
-			pr_err("bogus RREAD count (%d > %d)\n", count, rsize);
-			count = rsize;
-		}
 
-		p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count);
-		if (!count) {
-			p9_free_req(clnt, req);
-			break;
-		}
+		p9_debug(P9_DEBUG_9P, "<<< RREAD  count %d\n", count);
 
-		if (non_zc) {
-			int n = copy_to_iter(dataptr, count, to);
-			total += n;
-			offset += n;
-			if (n != count) {
-				*err = -EFAULT;
-				p9_free_req(clnt, req);
-				break;
-			}
-		} else {
-			iov_iter_advance(to, count);
-			total += count;
-			offset += count;
-		}
-		p9_free_req(clnt, req);
+		total += count;
+		offset += count;
 	}
 	return total;
 }
 EXPORT_SYMBOL(p9_client_read);
 
+static int
+p9_client_write_nodotx(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
+{
+	struct p9_client *clnt = fid->clnt;
+	struct p9_req_t *req;
+	int total = 0;
+	int count = iov_iter_count(from);
+	int rsize;
+
+	*err = 0;
+
+	rsize = fid->iounit;
+	if (!rsize || rsize > clnt->msize-P9_IOHDRSZ)
+		rsize = clnt->msize - P9_IOHDRSZ;
+
+	if (count < rsize)
+		rsize = count;
+
+	/* Don't bother zerocopy for small IO (< 1024) */
+	if (clnt->trans_mod->zc_request && rsize > 1024) {
+		req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, from, 0, rsize,
+				       P9_ZC_HDR_SZ, "dqd", fid->fid, offset,
+				       rsize);
+	} else {
+		req = p9_client_rpc(clnt, P9_TWRITE, "dqV", fid->fid, offset,
+				    rsize, from);
+	}
+	if (IS_ERR(req)) {
+		p9_debug(P9_DEBUG_9P, "          rpc error\n");
+		*err = PTR_ERR(req);
+		return 0;
+	}
+
+	*err = p9pdu_readf(req->rc, clnt->proto_version, "d", &count);
+	if (*err) {
+		p9_debug(P9_DEBUG_9P, "          ret err %d \n", *err);
+		trace_9p_protocol_dump(clnt, req->rc);
+		goto error;
+	}
+	if (rsize < count) {
+		pr_err("bogus RWRITE count (%d > %d)\n", count, rsize);
+		count = rsize;
+	}
+
+	iov_iter_advance(from, count);
+	total = count;
+error:
+	p9_free_req(clnt, req);
+
+	return total;
+}
+
+static int
+p9_client_write_dotx(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
+{
+	struct p9_client *clnt = fid->clnt;
+	struct p9_req_t *req = NULL;
+	int total = 0;
+	int count = iov_iter_count(from);
+	int rsize;
+	size_t off;
+
+	int i, maxpages, npages = 0;
+	struct page **pages = NULL;
+	unsigned long *ppns = NULL;
+
+	*err = 0;
+
+	maxpages = DIV_ROUND_UP(count, PAGE_SIZE);
+	maxpages = min(DOTX_ZC_MAXPAGES, maxpages);
+
+	pages = kmalloc(sizeof(struct page *) * maxpages, GFP_KERNEL);
+	if (!pages) {
+		*err = -ENOMEM;
+		goto error;
+	}
+	ppns = kmalloc(sizeof(unsigned long) * maxpages, GFP_KERNEL);
+	if (!ppns) {
+		*err = -ENOMEM;
+		goto error;
+	}
+	rsize = iov_iter_get_pages(from, pages, count, maxpages, &off);
+	if (rsize < 0) {
+		*err = rsize;
+		goto error;
+	}
+
+	npages = DIV_ROUND_UP(off + rsize, PAGE_SIZE);
+	for (i = 0; i < npages; i++) {
+		ppns[i] = page_to_pfn(pages[i]);
+	}
+
+	req = p9_client_rpc(clnt, P9_TWRITEX, "dqddp", fid->fid, offset, rsize,
+			    (unsigned int) off, npages, ppns);
+	if (IS_ERR(req)) {
+		p9_debug(P9_DEBUG_9P, "          rpc error\n");
+		*err = PTR_ERR(req);
+		goto error;
+	}
+
+	*err = p9pdu_readf(req->rc, clnt->proto_version, "d", &count);
+	if (*err) {
+		p9_debug(P9_DEBUG_9P, "          ret err %d \n", *err);
+		trace_9p_protocol_dump(clnt, req->rc);
+		goto error;
+	}
+	if (rsize < count) {
+		pr_err("bogus RWRITEX count (%d > %d)\n", count, rsize);
+		goto error;
+	}
+
+	iov_iter_advance(from, count);
+	total = count;
+error:
+	if (req && !IS_ERR(req))
+		p9_free_req(clnt, req);
+
+	for (i = 0; i < npages; i++) {
+		put_page(pages[i]);
+	}
+
+	kfree(pages);
+	kfree(ppns);
+
+	return total;
+}
+
 int
 p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
 {
-	struct p9_client *clnt = fid->clnt;
-	struct p9_req_t *req;
 	int total = 0;
+	int is_dotx = p9_is_proto_dotx(fid->clnt);
+
 	*err = 0;
 
 	p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %zd\n",
-				fid->fid, (unsigned long long) offset,
-				iov_iter_count(from));
+		 fid->fid, (unsigned long long) offset, iov_iter_count(from));
 
 	while (iov_iter_count(from)) {
-		int count = iov_iter_count(from);
-		int rsize = fid->iounit;
-		if (!rsize || rsize > clnt->msize-P9_IOHDRSZ)
-			rsize = clnt->msize - P9_IOHDRSZ;
+		int count;
 
-		if (count < rsize)
-			rsize = count;
-
-		/* Don't bother zerocopy for small IO (< 1024) */
-		if (clnt->trans_mod->zc_request && rsize > 1024) {
-			req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, from, 0,
-					       rsize, P9_ZC_HDR_SZ, "dqd",
-					       fid->fid, offset, rsize);
+		if (is_dotx && dotx_can_zc(from, fid->clnt)) {
+			count = p9_client_write_dotx(fid, offset, from, err);
+			if (*err == -ENXIO) {
+				pr_warn("Disabling dotx: No zero copy device\n");
+				fid->clnt->is_dotx_ok = 0;
+				continue;
+			}
 		} else {
-			req = p9_client_rpc(clnt, P9_TWRITE, "dqV", fid->fid,
-						    offset, rsize, from);
-		}
-		if (IS_ERR(req)) {
-			*err = PTR_ERR(req);
-			break;
+			count = p9_client_write_nodotx(fid, offset, from, err);
+			if (is_dotx && *err == -ENXIO) {
+				pr_warn("Enabling dotx: Zero copy device available\n");
+				fid->clnt->is_dotx_ok = 1;
+				continue;
+			}
 		}
 
-		*err = p9pdu_readf(req->rc, clnt->proto_version, "d", &count);
-		if (*err) {
-			trace_9p_protocol_dump(clnt, req->rc);
-			p9_free_req(clnt, req);
+		if (*err || count == 0)
 			break;
-		}
-		if (rsize < count) {
-			pr_err("bogus RWRITE count (%d > %d)\n", count, rsize);
-			count = rsize;
-		}
 
 		p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", count);
 
-		p9_free_req(clnt, req);
-		iov_iter_advance(from, count);
 		total += count;
 		offset += count;
 	}
@@ -1803,7 +2079,8 @@ static int p9_client_statsize(struct p9_wstat *wst, int proto_version)
 		ret += strlen(wst->muid);
 
 	if ((proto_version == p9_proto_2000u) ||
-		(proto_version == p9_proto_2000L)) {
+		(proto_version == p9_proto_2000L) ||
+		(proto_version == p9_proto_2000X)) {
 		ret += 2+4+4+4;	/* extension[s] n_uid[4] n_gid[4] n_muid[4] */
 		if (wst->extension)
 			ret += strlen(wst->extension);
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 145f805..62b0cf9 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -346,7 +346,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
 			break;
 		case '?':
 			if ((proto_version != p9_proto_2000u) &&
-				(proto_version != p9_proto_2000L))
+				(proto_version != p9_proto_2000L) &&
+				(proto_version != p9_proto_2000X))
 				return 0;
 			break;
 		default:
@@ -454,6 +455,20 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
 					errcode = -EFAULT;
 			}
 			break;
+		case 'p':{
+				int j;
+				uint32_t npages = va_arg(ap, uint32_t);
+				uint64_t *ppns = va_arg(ap, uint64_t *);
+				errcode =
+				    p9pdu_writef(pdu, proto_version, "d",
+								 npages);
+				for (j = 0; !errcode && j < npages; j++) {
+					errcode =
+					    p9pdu_writef(pdu, proto_version,
+							 "q", ppns[j]);
+				}
+			}
+			break;
 		case 'T':{
 				uint16_t nwname = va_arg(ap, int);
 				const char **wnames = va_arg(ap, const char **);
@@ -516,7 +531,8 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
 			break;
 		case '?':
 			if ((proto_version != p9_proto_2000u) &&
-				(proto_version != p9_proto_2000L))
+				(proto_version != p9_proto_2000L) &&
+				(proto_version != p9_proto_2000X))
 				return 0;
 			break;
 		default:
-- 
2.6.2