Browse code

Add HyperV specific patches in linux

Change-Id: Ia722bd19468230ecc5c3d5f4946b202528ff5b83
Reviewed-on: http://photon-jenkins.eng.vmware.com:8082/3340
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Tested-by: gerrit-photon <photon-checkins@vmware.com>

suezzelur authored on 2017/07/26 08:39:42
Showing 15 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,30 @@
0
+From b85149ae2f7a39acd91f8847bfc5a7a188681681 Mon Sep 17 00:00:00 2001
1
+From: Rolf Neugebauer <rolf.neugebauer@gmail.com>
2
+Date: Mon, 23 May 2016 18:55:45 +0100
3
+Subject: [PATCH 04/13] vmbus: Don't spam the logs with unknown GUIDs
4
+
5
+With Hyper-V sockets device types are introduced on the fly. The pr_info()
6
+then prints a message on every connection, which is way too verbose.  Since
7
+there doesn't seem to be an easy way to check for registered services,
8
+disable the pr_info() completely.
9
+
10
+Signed-off-by: Rolf Neugebauer <rolf.neugebauer@docker.com>
11
+---
12
+ drivers/hv/channel_mgmt.c | 1 -
13
+ 1 file changed, 1 deletion(-)
14
+
15
+diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
16
+index d8bc4b910192..8df02f3ca0b2 100644
17
+--- a/drivers/hv/channel_mgmt.c
18
+@@ -192,7 +192,6 @@ static u16 hv_get_dev_type(const struct vmbus_channel *channel)
19
+ 		if (!uuid_le_cmp(*guid, vmbus_devs[i].guid))
20
+ 			return i;
21
+ 	}
22
+-	pr_info("Unknown GUID: %pUl\n", guid);
23
+ 	return i;
24
+ }
25
+ 
26
+-- 
27
+2.13.0
28
+
0 29
new file mode 100644
... ...
@@ -0,0 +1,48 @@
0
+From f6107bf72b7891d10d1d56bdf5316bfd91c177dc Mon Sep 17 00:00:00 2001
1
+From: Alex Ng <alexng@messages.microsoft.com>
2
+Date: Sun, 6 Nov 2016 13:14:07 -0800
3
+Subject: [PATCH 05/13] Drivers: hv: utils: Fix the mapping between host
4
+ version and protocol to use
5
+
6
+We should intentionally declare the protocols to use for every known host
7
+and default to using the latest protocol if the host is unknown or new.
8
+
9
+Signed-off-by: Alex Ng <alexng@microsoft.com>
10
+Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
11
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
12
+Origin: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
13
+(cherry picked from commit 3da0401b4d0e17aea7526db0235d98fa535d903e)
14
+---
15
+ drivers/hv/hv_util.c | 9 ++++++---
16
+ 1 file changed, 6 insertions(+), 3 deletions(-)
17
+
18
+diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c
19
+index bcd06306f3e8..e7707747f56d 100644
20
+--- a/drivers/hv/hv_util.c
21
+@@ -389,16 +389,19 @@ static int util_probe(struct hv_device *dev,
22
+ 		ts_srv_version = TS_VERSION_1;
23
+ 		hb_srv_version = HB_VERSION_1;
24
+ 		break;
25
+-	case(VERSION_WIN10):
26
++	case VERSION_WIN7:
27
++	case VERSION_WIN8:
28
++	case VERSION_WIN8_1:
29
+ 		util_fw_version = UTIL_FW_VERSION;
30
+ 		sd_srv_version = SD_VERSION;
31
+-		ts_srv_version = TS_VERSION;
32
++		ts_srv_version = TS_VERSION_3;
33
+ 		hb_srv_version = HB_VERSION;
34
+ 		break;
35
++	case VERSION_WIN10:
36
+ 	default:
37
+ 		util_fw_version = UTIL_FW_VERSION;
38
+ 		sd_srv_version = SD_VERSION;
39
+-		ts_srv_version = TS_VERSION_3;
40
++		ts_srv_version = TS_VERSION;
41
+ 		hb_srv_version = HB_VERSION;
42
+ 	}
43
+ 
44
+-- 
45
+2.13.0
46
+
0 47
new file mode 100644
... ...
@@ -0,0 +1,105 @@
0
+From b1fa05201756d5ce898c4ec183737041284624c7 Mon Sep 17 00:00:00 2001
1
+From: Alex Ng <alexng@messages.microsoft.com>
2
+Date: Sun, 6 Nov 2016 13:14:10 -0800
3
+Subject: [PATCH 06/13] Drivers: hv: vss: Improve log messages.
4
+
5
+Adding log messages to help troubleshoot error cases and transaction
6
+handling.
7
+
8
+Signed-off-by: Alex Ng <alexng@microsoft.com>
9
+Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
10
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
11
+Origin: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
12
+(cherry picked from commit 23d2cc0c29eb0e7c6fe4cac88098306c31c40208)
13
+---
14
+ drivers/hv/hv_snapshot.c | 25 +++++++++++++++++++------
15
+ 1 file changed, 19 insertions(+), 6 deletions(-)
16
+
17
+diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c
18
+index a76e3db0d01f..b1446d51ef45 100644
19
+--- a/drivers/hv/hv_snapshot.c
20
+@@ -121,7 +121,7 @@ static int vss_handle_handshake(struct hv_vss_msg *vss_msg)
21
+ 	default:
22
+ 		return -EINVAL;
23
+ 	}
24
+-	pr_debug("VSS: userspace daemon ver. %d connected\n", dm_reg_value);
25
++	pr_info("VSS: userspace daemon ver. %d connected\n", dm_reg_value);
26
+ 	return 0;
27
+ }
28
+ 
29
+@@ -129,8 +129,10 @@ static int vss_on_msg(void *msg, int len)
30
+ {
31
+ 	struct hv_vss_msg *vss_msg = (struct hv_vss_msg *)msg;
32
+ 
33
+-	if (len != sizeof(*vss_msg))
34
++	if (len != sizeof(*vss_msg)) {
35
++		pr_debug("VSS: Message size does not match length\n");
36
+ 		return -EINVAL;
37
++	}
38
+ 
39
+ 	if (vss_msg->vss_hdr.operation == VSS_OP_REGISTER ||
40
+ 	    vss_msg->vss_hdr.operation == VSS_OP_REGISTER1) {
41
+@@ -138,8 +140,11 @@ static int vss_on_msg(void *msg, int len)
42
+ 		 * Don't process registration messages if we're in the middle
43
+ 		 * of a transaction processing.
44
+ 		 */
45
+-		if (vss_transaction.state > HVUTIL_READY)
46
++		if (vss_transaction.state > HVUTIL_READY) {
47
++			pr_debug("VSS: Got unexpected registration request\n");
48
+ 			return -EINVAL;
49
++		}
50
++
51
+ 		return vss_handle_handshake(vss_msg);
52
+ 	} else if (vss_transaction.state == HVUTIL_USERSPACE_REQ) {
53
+ 		vss_transaction.state = HVUTIL_USERSPACE_RECV;
54
+@@ -156,7 +161,7 @@ static int vss_on_msg(void *msg, int len)
55
+ 		}
56
+ 	} else {
57
+ 		/* This is a spurious call! */
58
+-		pr_warn("VSS: Transaction not active\n");
59
++		pr_debug("VSS: Transaction not active\n");
60
+ 		return -EINVAL;
61
+ 	}
62
+ 	return 0;
63
+@@ -169,8 +174,10 @@ static void vss_send_op(void)
64
+ 	struct hv_vss_msg *vss_msg;
65
+ 
66
+ 	/* The transaction state is wrong. */
67
+-	if (vss_transaction.state != HVUTIL_HOSTMSG_RECEIVED)
68
++	if (vss_transaction.state != HVUTIL_HOSTMSG_RECEIVED) {
69
++		pr_debug("VSS: Unexpected attempt to send to daemon\n");
70
+ 		return;
71
++	}
72
+ 
73
+ 	vss_msg = kzalloc(sizeof(*vss_msg), GFP_KERNEL);
74
+ 	if (!vss_msg)
75
+@@ -211,9 +218,13 @@ static void vss_handle_request(struct work_struct *dummy)
76
+ 	case VSS_OP_HOT_BACKUP:
77
+ 		if (vss_transaction.state < HVUTIL_READY) {
78
+ 			/* Userspace is not registered yet */
79
++			pr_debug("VSS: Not ready for request.\n");
80
+ 			vss_respond_to_host(HV_E_FAIL);
81
+ 			return;
82
+ 		}
83
++
84
++		pr_debug("VSS: Received request for op code: %d\n",
85
++			vss_transaction.msg->vss_hdr.operation);
86
+ 		vss_transaction.state = HVUTIL_HOSTMSG_RECEIVED;
87
+ 		vss_send_op();
88
+ 		return;
89
+@@ -356,8 +367,10 @@ hv_vss_init(struct hv_util_service *srv)
90
+ 
91
+ 	hvt = hvutil_transport_init(vss_devname, CN_VSS_IDX, CN_VSS_VAL,
92
+ 				    vss_on_msg, vss_on_reset);
93
+-	if (!hvt)
94
++	if (!hvt) {
95
++		pr_warn("VSS: Failed to initialize transport\n");
96
+ 		return -EFAULT;
97
++	}
98
+ 
99
+ 	return 0;
100
+ }
101
+-- 
102
+2.13.0
103
+
0 104
new file mode 100644
... ...
@@ -0,0 +1,48 @@
0
+From 1b1406809c1c2fdac0cb1fd2d85400dee09f2887 Mon Sep 17 00:00:00 2001
1
+From: Alex Ng <alexng@messages.microsoft.com>
2
+Date: Sun, 6 Nov 2016 13:14:11 -0800
3
+Subject: [PATCH 07/13] Drivers: hv: vss: Operation timeouts should match host
4
+ expectation
5
+
6
+Increase the timeout of backup operations. When system is under I/O load,
7
+it needs more time to freeze. These timeout values should also match the
8
+host timeout values more closely.
9
+
10
+Signed-off-by: Alex Ng <alexng@microsoft.com>
11
+Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
12
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
13
+Origin: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
14
+(cherry picked from commit b357fd3908c1191f2f56e38aa77f2aecdae18bc8)
15
+---
16
+ drivers/hv/hv_snapshot.c | 8 ++++++--
17
+ 1 file changed, 6 insertions(+), 2 deletions(-)
18
+
19
+diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c
20
+index b1446d51ef45..4e543dbb731a 100644
21
+--- a/drivers/hv/hv_snapshot.c
22
+@@ -31,7 +31,10 @@
23
+ #define VSS_MINOR  0
24
+ #define VSS_VERSION    (VSS_MAJOR << 16 | VSS_MINOR)
25
+ 
26
+-#define VSS_USERSPACE_TIMEOUT (msecs_to_jiffies(10 * 1000))
27
++/*
28
++ * Timeout values are based on expecations from host
29
++ */
30
++#define VSS_FREEZE_TIMEOUT (15 * 60)
31
+ 
32
+ /*
33
+  * Global state maintained for transaction that is being processed. For a class
34
+@@ -187,7 +190,8 @@ static void vss_send_op(void)
35
+ 
36
+ 	vss_transaction.state = HVUTIL_USERSPACE_REQ;
37
+ 
38
+-	schedule_delayed_work(&vss_timeout_work, VSS_USERSPACE_TIMEOUT);
39
++	schedule_delayed_work(&vss_timeout_work, op == VSS_OP_FREEZE ?
40
++			VSS_FREEZE_TIMEOUT * HZ : HV_UTIL_TIMEOUT * HZ);
41
+ 
42
+ 	rc = hvutil_transport_send(hvt, vss_msg, sizeof(*vss_msg), NULL);
43
+ 	if (rc) {
44
+-- 
45
+2.13.0
46
+
0 47
new file mode 100644
... ...
@@ -0,0 +1,492 @@
0
+From 66955115f0d8764a93c4ec9291d917d4a8be3e8f Mon Sep 17 00:00:00 2001
1
+From: Alex Ng <alexng@messages.microsoft.com>
2
+Date: Sat, 28 Jan 2017 12:37:17 -0700
3
+Subject: [PATCH 08/13] Drivers: hv: vmbus: Use all supported IC versions to
4
+ negotiate
5
+
6
+Previously, we were assuming that each IC protocol version was tied to a
7
+specific host version. For example, some Windows 10 preview hosts only
8
+support v3 TimeSync even though driver assumes v4 is supported by all
9
+Windows 10 hosts.
10
+
11
+The guest will stop trying to negotiate even though older supported
12
+versions may still be offered by the host.
13
+
14
+Make IC version negotiation more robust by going through all versions
15
+that are supported by the guest.
16
+
17
+Fixes: 3da0401b4d0e ("Drivers: hv: utils: Fix the mapping between host
18
+version and protocol to use")
19
+
20
+Reported-by: Rolf Neugebauer <rolf.neugebauer@docker.com>
21
+Signed-off-by: Alex Ng <alexng@messages.microsoft.com>
22
+Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
23
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
24
+Origin: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
25
+(cherry picked from commit a1656454131880980bc3a5313c8bf66ef5990c91)
26
+---
27
+ drivers/hv/channel_mgmt.c | 80 +++++++++++++++++++++++++++-------------
28
+ drivers/hv/hv_fcopy.c     | 20 +++++++---
29
+ drivers/hv/hv_kvp.c       | 41 +++++++++------------
30
+ drivers/hv/hv_snapshot.c  | 18 +++++++--
31
+ drivers/hv/hv_util.c      | 94 +++++++++++++++++++++++++----------------------
32
+ include/linux/hyperv.h    |  7 ++--
33
+ 6 files changed, 154 insertions(+), 106 deletions(-)
34
+
35
+diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
36
+index 8df02f3ca0b2..e7949b64bfbc 100644
37
+--- a/drivers/hv/channel_mgmt.c
38
+@@ -202,33 +202,34 @@ static u16 hv_get_dev_type(const struct vmbus_channel *channel)
39
+  * @buf: Raw buffer channel data
40
+  *
41
+  * @icmsghdrp is of type &struct icmsg_hdr.
42
+- * @negop is of type &struct icmsg_negotiate.
43
+  * Set up and fill in default negotiate response message.
44
+  *
45
+- * The fw_version specifies the  framework version that
46
+- * we can support and srv_version specifies the service
47
+- * version we can support.
48
++ * The fw_version and fw_vercnt specifies the framework version that
49
++ * we can support.
50
++ *
51
++ * The srv_version and srv_vercnt specifies the service
52
++ * versions we can support.
53
++ *
54
++ * Versions are given in decreasing order.
55
++ *
56
++ * nego_fw_version and nego_srv_version store the selected protocol versions.
57
+  *
58
+  * Mainly used by Hyper-V drivers.
59
+  */
60
+ bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp,
61
+-				struct icmsg_negotiate *negop, u8 *buf,
62
+-				int fw_version, int srv_version)
63
++				u8 *buf, const int *fw_version, int fw_vercnt,
64
++				const int *srv_version, int srv_vercnt,
65
++				int *nego_fw_version, int *nego_srv_version)
66
+ {
67
+ 	int icframe_major, icframe_minor;
68
+ 	int icmsg_major, icmsg_minor;
69
+ 	int fw_major, fw_minor;
70
+ 	int srv_major, srv_minor;
71
+-	int i;
72
++	int i, j;
73
+ 	bool found_match = false;
74
++	struct icmsg_negotiate *negop;
75
+ 
76
+ 	icmsghdrp->icmsgsize = 0x10;
77
+-	fw_major = (fw_version >> 16);
78
+-	fw_minor = (fw_version & 0xFFFF);
79
+-
80
+-	srv_major = (srv_version >> 16);
81
+-	srv_minor = (srv_version & 0xFFFF);
82
+-
83
+ 	negop = (struct icmsg_negotiate *)&buf[
84
+ 		sizeof(struct vmbuspipe_hdr) +
85
+ 		sizeof(struct icmsg_hdr)];
86
+@@ -244,13 +245,22 @@ bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp,
87
+ 	 * support.
88
+ 	 */
89
+ 
90
+-	for (i = 0; i < negop->icframe_vercnt; i++) {
91
+-		if ((negop->icversion_data[i].major == fw_major) &&
92
+-		   (negop->icversion_data[i].minor == fw_minor)) {
93
+-			icframe_major = negop->icversion_data[i].major;
94
+-			icframe_minor = negop->icversion_data[i].minor;
95
+-			found_match = true;
96
++	for (i = 0; i < fw_vercnt; i++) {
97
++		fw_major = (fw_version[i] >> 16);
98
++		fw_minor = (fw_version[i] & 0xFFFF);
99
++
100
++		for (j = 0; j < negop->icframe_vercnt; j++) {
101
++			if ((negop->icversion_data[j].major == fw_major) &&
102
++			    (negop->icversion_data[j].minor == fw_minor)) {
103
++				icframe_major = negop->icversion_data[j].major;
104
++				icframe_minor = negop->icversion_data[j].minor;
105
++				found_match = true;
106
++				break;
107
++			}
108
+ 		}
109
++
110
++		if (found_match)
111
++			break;
112
+ 	}
113
+ 
114
+ 	if (!found_match)
115
+@@ -258,14 +268,26 @@ bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp,
116
+ 
117
+ 	found_match = false;
118
+ 
119
+-	for (i = negop->icframe_vercnt;
120
+-		 (i < negop->icframe_vercnt + negop->icmsg_vercnt); i++) {
121
+-		if ((negop->icversion_data[i].major == srv_major) &&
122
+-		   (negop->icversion_data[i].minor == srv_minor)) {
123
+-			icmsg_major = negop->icversion_data[i].major;
124
+-			icmsg_minor = negop->icversion_data[i].minor;
125
+-			found_match = true;
126
++	for (i = 0; i < srv_vercnt; i++) {
127
++		srv_major = (srv_version[i] >> 16);
128
++		srv_minor = (srv_version[i] & 0xFFFF);
129
++
130
++		for (j = negop->icframe_vercnt;
131
++			(j < negop->icframe_vercnt + negop->icmsg_vercnt);
132
++			j++) {
133
++
134
++			if ((negop->icversion_data[j].major == srv_major) &&
135
++				(negop->icversion_data[j].minor == srv_minor)) {
136
++
137
++				icmsg_major = negop->icversion_data[j].major;
138
++				icmsg_minor = negop->icversion_data[j].minor;
139
++				found_match = true;
140
++				break;
141
++			}
142
+ 		}
143
++
144
++		if (found_match)
145
++			break;
146
+ 	}
147
+ 
148
+ 	/*
149
+@@ -282,6 +304,12 @@ bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp,
150
+ 		negop->icmsg_vercnt = 1;
151
+ 	}
152
+ 
153
++	if (nego_fw_version)
154
++		*nego_fw_version = (icframe_major << 16) | icframe_minor;
155
++
156
++	if (nego_srv_version)
157
++		*nego_srv_version = (icmsg_major << 16) | icmsg_minor;
158
++
159
+ 	negop->icversion_data[0].major = icframe_major;
160
+ 	negop->icversion_data[0].minor = icframe_minor;
161
+ 	negop->icversion_data[1].major = icmsg_major;
162
+diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c
163
+index e47d8c9db03a..0a315e6aa589 100644
164
+--- a/drivers/hv/hv_fcopy.c
165
+@@ -31,6 +31,16 @@
166
+ #define WIN8_SRV_MINOR		1
167
+ #define WIN8_SRV_VERSION	(WIN8_SRV_MAJOR << 16 | WIN8_SRV_MINOR)
168
+ 
169
++#define FCOPY_VER_COUNT 1
170
++static const int fcopy_versions[] = {
171
++	WIN8_SRV_VERSION
172
++};
173
++
174
++#define FW_VER_COUNT 1
175
++static const int fw_versions[] = {
176
++	UTIL_FW_VERSION
177
++};
178
++
179
+ /*
180
+  * Global state maintained for transaction that is being processed.
181
+  * For a class of integration services, including the "file copy service",
182
+@@ -228,8 +238,6 @@ void hv_fcopy_onchannelcallback(void *context)
183
+ 	u64 requestid;
184
+ 	struct hv_fcopy_hdr *fcopy_msg;
185
+ 	struct icmsg_hdr *icmsghdr;
186
+-	struct icmsg_negotiate *negop = NULL;
187
+-	int util_fw_version;
188
+ 	int fcopy_srv_version;
189
+ 
190
+ 	if (fcopy_transaction.state > HVUTIL_READY)
191
+@@ -243,10 +251,10 @@ void hv_fcopy_onchannelcallback(void *context)
192
+ 	icmsghdr = (struct icmsg_hdr *)&recv_buffer[
193
+ 			sizeof(struct vmbuspipe_hdr)];
194
+ 	if (icmsghdr->icmsgtype == ICMSGTYPE_NEGOTIATE) {
195
+-		util_fw_version = UTIL_FW_VERSION;
196
+-		fcopy_srv_version = WIN8_SRV_VERSION;
197
+-		vmbus_prep_negotiate_resp(icmsghdr, negop, recv_buffer,
198
+-				util_fw_version, fcopy_srv_version);
199
++		vmbus_prep_negotiate_resp(icmsghdr, recv_buffer,
200
++				fw_versions, FW_VER_COUNT,
201
++				fcopy_versions, FCOPY_VER_COUNT,
202
++				NULL, &fcopy_srv_version);
203
+ 	} else {
204
+ 		fcopy_msg = (struct hv_fcopy_hdr *)&recv_buffer[
205
+ 				sizeof(struct vmbuspipe_hdr) +
206
+diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
207
+index 3abfc5983c97..2cc670442f6c 100644
208
+--- a/drivers/hv/hv_kvp.c
209
+@@ -46,6 +46,19 @@
210
+ #define WIN8_SRV_MINOR   0
211
+ #define WIN8_SRV_VERSION     (WIN8_SRV_MAJOR << 16 | WIN8_SRV_MINOR)
212
+ 
213
++#define KVP_VER_COUNT 3
214
++static const int kvp_versions[] = {
215
++	WIN8_SRV_VERSION,
216
++	WIN7_SRV_VERSION,
217
++	WS2008_SRV_VERSION
218
++};
219
++
220
++#define FW_VER_COUNT 2
221
++static const int fw_versions[] = {
222
++	UTIL_FW_VERSION,
223
++	UTIL_WS2K8_FW_VERSION
224
++};
225
++
226
+ /*
227
+  * Global state maintained for transaction that is being processed. For a class
228
+  * of integration services, including the "KVP service", the specified protocol
229
+@@ -610,8 +623,6 @@ void hv_kvp_onchannelcallback(void *context)
230
+ 	struct hv_kvp_msg *kvp_msg;
231
+ 
232
+ 	struct icmsg_hdr *icmsghdrp;
233
+-	struct icmsg_negotiate *negop = NULL;
234
+-	int util_fw_version;
235
+ 	int kvp_srv_version;
236
+ 	static enum {NEGO_NOT_STARTED,
237
+ 		     NEGO_IN_PROGRESS,
238
+@@ -640,28 +651,10 @@ void hv_kvp_onchannelcallback(void *context)
239
+ 			sizeof(struct vmbuspipe_hdr)];
240
+ 
241
+ 		if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) {
242
+-			/*
243
+-			 * Based on the host, select appropriate
244
+-			 * framework and service versions we will
245
+-			 * negotiate.
246
+-			 */
247
+-			switch (vmbus_proto_version) {
248
+-			case (VERSION_WS2008):
249
+-				util_fw_version = UTIL_WS2K8_FW_VERSION;
250
+-				kvp_srv_version = WS2008_SRV_VERSION;
251
+-				break;
252
+-			case (VERSION_WIN7):
253
+-				util_fw_version = UTIL_FW_VERSION;
254
+-				kvp_srv_version = WIN7_SRV_VERSION;
255
+-				break;
256
+-			default:
257
+-				util_fw_version = UTIL_FW_VERSION;
258
+-				kvp_srv_version = WIN8_SRV_VERSION;
259
+-			}
260
+-			vmbus_prep_negotiate_resp(icmsghdrp, negop,
261
+-				 recv_buffer, util_fw_version,
262
+-				 kvp_srv_version);
263
+-
264
++			vmbus_prep_negotiate_resp(icmsghdrp,
265
++				 recv_buffer, fw_versions, FW_VER_COUNT,
266
++				 kvp_versions, KVP_VER_COUNT,
267
++				 NULL, &kvp_srv_version);
268
+ 		} else {
269
+ 			kvp_msg = (struct hv_kvp_msg *)&recv_buffer[
270
+ 				sizeof(struct vmbuspipe_hdr) +
271
+diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c
272
+index 4e543dbb731a..d14f10b924a0 100644
273
+--- a/drivers/hv/hv_snapshot.c
274
+@@ -31,6 +31,16 @@
275
+ #define VSS_MINOR  0
276
+ #define VSS_VERSION    (VSS_MAJOR << 16 | VSS_MINOR)
277
+ 
278
++#define VSS_VER_COUNT 1
279
++static const int vss_versions[] = {
280
++	VSS_VERSION
281
++};
282
++
283
++#define FW_VER_COUNT 1
284
++static const int fw_versions[] = {
285
++	UTIL_FW_VERSION
286
++};
287
++
288
+ /*
289
+  * Timeout values are based on expecations from host
290
+  */
291
+@@ -297,7 +307,6 @@ void hv_vss_onchannelcallback(void *context)
292
+ 
293
+ 
294
+ 	struct icmsg_hdr *icmsghdrp;
295
+-	struct icmsg_negotiate *negop = NULL;
296
+ 
297
+ 	if (vss_transaction.state > HVUTIL_READY)
298
+ 		return;
299
+@@ -310,9 +319,10 @@ void hv_vss_onchannelcallback(void *context)
300
+ 			sizeof(struct vmbuspipe_hdr)];
301
+ 
302
+ 		if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) {
303
+-			vmbus_prep_negotiate_resp(icmsghdrp, negop,
304
+-				 recv_buffer, UTIL_FW_VERSION,
305
+-				 VSS_VERSION);
306
++			vmbus_prep_negotiate_resp(icmsghdrp,
307
++				 recv_buffer, fw_versions, FW_VER_COUNT,
308
++				 vss_versions, VSS_VER_COUNT,
309
++				 NULL, NULL);
310
+ 		} else {
311
+ 			vss_msg = (struct hv_vss_msg *)&recv_buffer[
312
+ 				sizeof(struct vmbuspipe_hdr) +
313
+diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c
314
+index e7707747f56d..f3797c07be10 100644
315
+--- a/drivers/hv/hv_util.c
316
+@@ -57,7 +57,31 @@
317
+ static int sd_srv_version;
318
+ static int ts_srv_version;
319
+ static int hb_srv_version;
320
+-static int util_fw_version;
321
++
322
++#define SD_VER_COUNT 2
323
++static const int sd_versions[] = {
324
++	SD_VERSION,
325
++	SD_VERSION_1
326
++};
327
++
328
++#define TS_VER_COUNT 3
329
++static const int ts_versions[] = {
330
++	TS_VERSION,
331
++	TS_VERSION_3,
332
++	TS_VERSION_1
333
++};
334
++
335
++#define HB_VER_COUNT 2
336
++static const int hb_versions[] = {
337
++	HB_VERSION,
338
++	HB_VERSION_1
339
++};
340
++
341
++#define FW_VER_COUNT 2
342
++static const int fw_versions[] = {
343
++	UTIL_FW_VERSION,
344
++	UTIL_WS2K8_FW_VERSION
345
++};
346
+ 
347
+ static void shutdown_onchannelcallback(void *context);
348
+ static struct hv_util_service util_shutdown = {
349
+@@ -118,7 +142,6 @@ static void shutdown_onchannelcallback(void *context)
350
+ 	struct shutdown_msg_data *shutdown_msg;
351
+ 
352
+ 	struct icmsg_hdr *icmsghdrp;
353
+-	struct icmsg_negotiate *negop = NULL;
354
+ 
355
+ 	vmbus_recvpacket(channel, shut_txf_buf,
356
+ 			 PAGE_SIZE, &recvlen, &requestid);
357
+@@ -128,9 +151,14 @@ static void shutdown_onchannelcallback(void *context)
358
+ 			sizeof(struct vmbuspipe_hdr)];
359
+ 
360
+ 		if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) {
361
+-			vmbus_prep_negotiate_resp(icmsghdrp, negop,
362
+-					shut_txf_buf, util_fw_version,
363
+-					sd_srv_version);
364
++			if (vmbus_prep_negotiate_resp(icmsghdrp, shut_txf_buf,
365
++					fw_versions, FW_VER_COUNT,
366
++					sd_versions, SD_VER_COUNT,
367
++					NULL, &sd_srv_version)) {
368
++				pr_info("Shutdown IC version %d.%d\n",
369
++					sd_srv_version >> 16,
370
++					sd_srv_version & 0xFFFF);
371
++			}
372
+ 		} else {
373
+ 			shutdown_msg =
374
+ 				(struct shutdown_msg_data *)&shut_txf_buf[
375
+@@ -253,7 +281,6 @@ static void timesync_onchannelcallback(void *context)
376
+ 	struct ictimesync_data *timedatap;
377
+ 	struct ictimesync_ref_data *refdata;
378
+ 	u8 *time_txf_buf = util_timesynch.recv_buffer;
379
+-	struct icmsg_negotiate *negop = NULL;
380
+ 
381
+ 	vmbus_recvpacket(channel, time_txf_buf,
382
+ 			 PAGE_SIZE, &recvlen, &requestid);
383
+@@ -263,12 +290,14 @@ static void timesync_onchannelcallback(void *context)
384
+ 				sizeof(struct vmbuspipe_hdr)];
385
+ 
386
+ 		if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) {
387
+-			vmbus_prep_negotiate_resp(icmsghdrp, negop,
388
+-						time_txf_buf,
389
+-						util_fw_version,
390
+-						ts_srv_version);
391
+-			pr_info("Using TimeSync version %d.%d\n",
392
+-				ts_srv_version >> 16, ts_srv_version & 0xFFFF);
393
++			if (vmbus_prep_negotiate_resp(icmsghdrp, time_txf_buf,
394
++						fw_versions, FW_VER_COUNT,
395
++						ts_versions, TS_VER_COUNT,
396
++						NULL, &ts_srv_version)) {
397
++				pr_info("TimeSync version %d.%d\n",
398
++					ts_srv_version >> 16,
399
++					ts_srv_version & 0xFFFF);
400
++			}
401
+ 		} else {
402
+ 			if (ts_srv_version > TS_VERSION_3) {
403
+ 				refdata = (struct ictimesync_ref_data *)
404
+@@ -312,7 +341,6 @@ static void heartbeat_onchannelcallback(void *context)
405
+ 	struct icmsg_hdr *icmsghdrp;
406
+ 	struct heartbeat_msg_data *heartbeat_msg;
407
+ 	u8 *hbeat_txf_buf = util_heartbeat.recv_buffer;
408
+-	struct icmsg_negotiate *negop = NULL;
409
+ 
410
+ 	while (1) {
411
+ 
412
+@@ -326,9 +354,16 @@ static void heartbeat_onchannelcallback(void *context)
413
+ 				sizeof(struct vmbuspipe_hdr)];
414
+ 
415
+ 		if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) {
416
+-			vmbus_prep_negotiate_resp(icmsghdrp, negop,
417
+-				hbeat_txf_buf, util_fw_version,
418
+-				hb_srv_version);
419
++			if (vmbus_prep_negotiate_resp(icmsghdrp,
420
++					hbeat_txf_buf,
421
++					fw_versions, FW_VER_COUNT,
422
++					hb_versions, HB_VER_COUNT,
423
++					NULL, &hb_srv_version)) {
424
++
425
++				pr_info("Heartbeat version %d.%d\n",
426
++					hb_srv_version >> 16,
427
++					hb_srv_version & 0xFFFF);
428
++			}
429
+ 		} else {
430
+ 			heartbeat_msg =
431
+ 				(struct heartbeat_msg_data *)&hbeat_txf_buf[
432
+@@ -378,33 +413,6 @@ static int util_probe(struct hv_device *dev,
433
+ 
434
+ 	hv_set_drvdata(dev, srv);
435
+ 
436
+-	/*
437
+-	 * Based on the host; initialize the framework and
438
+-	 * service version numbers we will negotiate.
439
+-	 */
440
+-	switch (vmbus_proto_version) {
441
+-	case (VERSION_WS2008):
442
+-		util_fw_version = UTIL_WS2K8_FW_VERSION;
443
+-		sd_srv_version = SD_VERSION_1;
444
+-		ts_srv_version = TS_VERSION_1;
445
+-		hb_srv_version = HB_VERSION_1;
446
+-		break;
447
+-	case VERSION_WIN7:
448
+-	case VERSION_WIN8:
449
+-	case VERSION_WIN8_1:
450
+-		util_fw_version = UTIL_FW_VERSION;
451
+-		sd_srv_version = SD_VERSION;
452
+-		ts_srv_version = TS_VERSION_3;
453
+-		hb_srv_version = HB_VERSION;
454
+-		break;
455
+-	case VERSION_WIN10:
456
+-	default:
457
+-		util_fw_version = UTIL_FW_VERSION;
458
+-		sd_srv_version = SD_VERSION;
459
+-		ts_srv_version = TS_VERSION;
460
+-		hb_srv_version = HB_VERSION;
461
+-	}
462
+-
463
+ 	ret = vmbus_open(dev->channel, 4 * PAGE_SIZE, 4 * PAGE_SIZE, NULL, 0,
464
+ 			srv->util_cb, dev->channel);
465
+ 	if (ret)
466
+diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
467
+index 489ad74c1e6e..956acfc93487 100644
468
+--- a/include/linux/hyperv.h
469
+@@ -1453,9 +1453,10 @@ struct hyperv_service_callback {
470
+ };
471
+ 
472
+ #define MAX_SRV_VER	0x7ffffff
473
+-extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *,
474
+-					struct icmsg_negotiate *, u8 *, int,
475
+-					int);
476
++extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf,
477
++				const int *fw_version, int fw_vercnt,
478
++				const int *srv_version, int srv_vercnt,
479
++				int *nego_fw_version, int *nego_srv_version);
480
+ 
481
+ void hv_event_tasklet_disable(struct vmbus_channel *channel);
482
+ void hv_event_tasklet_enable(struct vmbus_channel *channel);
483
+-- 
484
+2.13.0
485
+
0 486
new file mode 100644
... ...
@@ -0,0 +1,118 @@
0
+From c6ad5884453c734b343c608ea8a504743549d836 Mon Sep 17 00:00:00 2001
1
+From: Alex Ng <alexng@messages.microsoft.com>
2
+Date: Sat, 28 Jan 2017 12:37:18 -0700
3
+Subject: [PATCH 09/13] Drivers: hv: Log the negotiated IC versions.
4
+
5
+Log the negotiated IC versions.
6
+
7
+Signed-off-by: Alex Ng <alexng@messages.microsoft.com>
8
+Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
9
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
10
+Origin: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
11
+(cherry picked from commit 1274a690f6b2bd2b37447c47e3062afa8aa43f93)
12
+---
13
+ drivers/hv/hv_fcopy.c    |  9 +++++++--
14
+ drivers/hv/hv_kvp.c      |  8 ++++++--
15
+ drivers/hv/hv_snapshot.c | 11 ++++++++---
16
+ drivers/hv/hv_util.c     |  4 ++--
17
+ 4 files changed, 23 insertions(+), 9 deletions(-)
18
+
19
+diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c
20
+index 0a315e6aa589..9aee6014339d 100644
21
+--- a/drivers/hv/hv_fcopy.c
22
+@@ -251,10 +251,15 @@ void hv_fcopy_onchannelcallback(void *context)
23
+ 	icmsghdr = (struct icmsg_hdr *)&recv_buffer[
24
+ 			sizeof(struct vmbuspipe_hdr)];
25
+ 	if (icmsghdr->icmsgtype == ICMSGTYPE_NEGOTIATE) {
26
+-		vmbus_prep_negotiate_resp(icmsghdr, recv_buffer,
27
++		if (vmbus_prep_negotiate_resp(icmsghdr, recv_buffer,
28
+ 				fw_versions, FW_VER_COUNT,
29
+ 				fcopy_versions, FCOPY_VER_COUNT,
30
+-				NULL, &fcopy_srv_version);
31
++				NULL, &fcopy_srv_version)) {
32
++
33
++			pr_info("FCopy IC version %d.%d\n",
34
++				fcopy_srv_version >> 16,
35
++				fcopy_srv_version & 0xFFFF);
36
++		}
37
+ 	} else {
38
+ 		fcopy_msg = (struct hv_fcopy_hdr *)&recv_buffer[
39
+ 				sizeof(struct vmbuspipe_hdr) +
40
+diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
41
+index 2cc670442f6c..de263712e247 100644
42
+--- a/drivers/hv/hv_kvp.c
43
+@@ -651,10 +651,14 @@ void hv_kvp_onchannelcallback(void *context)
44
+ 			sizeof(struct vmbuspipe_hdr)];
45
+ 
46
+ 		if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) {
47
+-			vmbus_prep_negotiate_resp(icmsghdrp,
48
++			if (vmbus_prep_negotiate_resp(icmsghdrp,
49
+ 				 recv_buffer, fw_versions, FW_VER_COUNT,
50
+ 				 kvp_versions, KVP_VER_COUNT,
51
+-				 NULL, &kvp_srv_version);
52
++				 NULL, &kvp_srv_version)) {
53
++				pr_info("KVP IC version %d.%d\n",
54
++					kvp_srv_version >> 16,
55
++					kvp_srv_version & 0xFFFF);
56
++			}
57
+ 		} else {
58
+ 			kvp_msg = (struct hv_kvp_msg *)&recv_buffer[
59
+ 				sizeof(struct vmbuspipe_hdr) +
60
+diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c
61
+index d14f10b924a0..bcc03f0748d6 100644
62
+--- a/drivers/hv/hv_snapshot.c
63
+@@ -304,7 +304,7 @@ void hv_vss_onchannelcallback(void *context)
64
+ 	u32 recvlen;
65
+ 	u64 requestid;
66
+ 	struct hv_vss_msg *vss_msg;
67
+-
68
++	int vss_srv_version;
69
+ 
70
+ 	struct icmsg_hdr *icmsghdrp;
71
+ 
72
+@@ -319,10 +319,15 @@ void hv_vss_onchannelcallback(void *context)
73
+ 			sizeof(struct vmbuspipe_hdr)];
74
+ 
75
+ 		if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) {
76
+-			vmbus_prep_negotiate_resp(icmsghdrp,
77
++			if (vmbus_prep_negotiate_resp(icmsghdrp,
78
+ 				 recv_buffer, fw_versions, FW_VER_COUNT,
79
+ 				 vss_versions, VSS_VER_COUNT,
80
+-				 NULL, NULL);
81
++				 NULL, &vss_srv_version)) {
82
++
83
++				pr_info("VSS IC version %d.%d\n",
84
++					vss_srv_version >> 16,
85
++					vss_srv_version & 0xFFFF);
86
++			}
87
+ 		} else {
88
+ 			vss_msg = (struct hv_vss_msg *)&recv_buffer[
89
+ 				sizeof(struct vmbuspipe_hdr) +
90
+diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c
91
+index f3797c07be10..89440c2eb346 100644
92
+--- a/drivers/hv/hv_util.c
93
+@@ -294,7 +294,7 @@ static void timesync_onchannelcallback(void *context)
94
+ 						fw_versions, FW_VER_COUNT,
95
+ 						ts_versions, TS_VER_COUNT,
96
+ 						NULL, &ts_srv_version)) {
97
+-				pr_info("TimeSync version %d.%d\n",
98
++				pr_info("TimeSync IC version %d.%d\n",
99
+ 					ts_srv_version >> 16,
100
+ 					ts_srv_version & 0xFFFF);
101
+ 			}
102
+@@ -360,7 +360,7 @@ static void heartbeat_onchannelcallback(void *context)
103
+ 					hb_versions, HB_VER_COUNT,
104
+ 					NULL, &hb_srv_version)) {
105
+ 
106
+-				pr_info("Heartbeat version %d.%d\n",
107
++				pr_info("Heartbeat IC version %d.%d\n",
108
+ 					hb_srv_version >> 16,
109
+ 					hb_srv_version & 0xFFFF);
110
+ 			}
111
+-- 
112
+2.13.0
113
+
0 114
new file mode 100644
... ...
@@ -0,0 +1,64 @@
0
+From eb533e334d182ea553c138576788771e55f0f484 Mon Sep 17 00:00:00 2001
1
+From: Dexuan Cui <decui@microsoft.com>
2
+Date: Sun, 26 Mar 2017 16:42:20 +0800
3
+Subject: [PATCH 10/13] vmbus: fix missed ring events on boot
4
+
5
+During initialization, the channel initialization code schedules the
6
+tasklet to scan the VMBUS receive event page (i.e. simulates an
7
+interrupt). The problem was that it invokes the tasklet on a different
8
+CPU from where it normally runs and therefore if an event is present,
9
+it will clear the bit but not find the associated channel.
10
+
11
+This can lead to missed events, typically stuck tasks, during bootup
12
+when sub channels are being initialized. Typically seen as stuck
13
+boot with 8 or more CPU's.
14
+
15
+This patch is not necessary for upstream (4.11 and later) since
16
+commit 631e63a9f346 ("vmbus: change to per channel tasklet").
17
+This changed vmbus code to get rid of common tasklet which
18
+caused the problem.
19
+
20
+Cc: stable@vger.kernel.org
21
+Fixes: 638fea33aee8 ("Drivers: hv: vmbus: fix the race when querying & updating the percpu list")
22
+Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
23
+Origin: git@github.com:dcui/linux.git
24
+(cherry picked from commit 5cf3a72a111cecc7da759542c56560ce509159d7)
25
+---
26
+ drivers/hv/channel_mgmt.c | 19 +++++++++++--
27
+ 1 file changed, 17 insertions(+), 2 deletions(-)
28
+
29
+diff -rup linux-4.9.38/drivers/hv/channel_mgmt.c linux-4.9.38-new/drivers/hv/channel_mgmt.c
30
+--- linux-4.9.38/drivers/hv/channel_mgmt.c	2017-07-28 14:50:56.413190385 -0700
31
+@@ -382,14 +382,29 @@ void hv_event_tasklet_disable(struct vmb
32
+ 	tasklet_disable(tasklet);
33
+ }
34
+ 
35
++void tasklet_schedule_wrapper(void *tasklet_notype)
36
++{
37
++        struct tasklet_struct *tasklet = (struct tasklet_struct *)tasklet_notype;
38
++        tasklet_schedule(tasklet);
39
++}
40
++
41
+ void hv_event_tasklet_enable(struct vmbus_channel *channel)
42
+ {
43
+ 	struct tasklet_struct *tasklet;
44
+ 	tasklet = hv_context.event_dpc[channel->target_cpu];
45
+ 	tasklet_enable(tasklet);
46
+ 
47
+-	/* In case there is any pending event */
48
+-	tasklet_schedule(tasklet);
49
++        /*
50
++         * In case there is any pending event schedule a rescan
51
++         * but must be on the correct CPU for the channel.
52
++         */
53
++        if (channel->target_cpu == get_cpu())
54
++                tasklet_schedule(tasklet);
55
++        else
56
++                smp_call_function_single(channel->target_cpu,
57
++                                        (smp_call_func_t)tasklet_schedule_wrapper,
58
++                                        (void *)tasklet, false);
59
++       put_cpu();
60
+ }
61
+ 
62
+ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
0 63
new file mode 100644
... ...
@@ -0,0 +1,60 @@
0
+From 830ea2549499b58f3b49e304d84f79c1f377883e Mon Sep 17 00:00:00 2001
1
+From: Dexuan Cui <decui@microsoft.com>
2
+Date: Wed, 29 Mar 2017 18:37:10 +0800
3
+Subject: [PATCH 11/13] vmbus: remove "goto error_clean_msglist" in
4
+ vmbus_open()
5
+
6
+This is just a cleanup patch to simplify the code a little.
7
+No semantic change.
8
+
9
+Signed-off-by: Dexuan Cui <decui@microsoft.com>
10
+Origin: git@github.com:dcui/linux.git
11
+(cherry picked from commit 2c89f21cbdfd39299482cd6068094097a45f13b3)
12
+---
13
+ drivers/hv/channel.c | 18 +++++++-----------
14
+ 1 file changed, 7 insertions(+), 11 deletions(-)
15
+
16
+diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
17
+index 1606e7f08f4b..1caed01954f6 100644
18
+--- a/drivers/hv/channel.c
19
+@@ -184,17 +184,18 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
20
+ 	ret = vmbus_post_msg(open_msg,
21
+ 			     sizeof(struct vmbus_channel_open_channel), true);
22
+ 
23
+-	if (ret != 0) {
24
+-		err = ret;
25
+-		goto error_clean_msglist;
26
+-	}
27
+-
28
+-	wait_for_completion(&open_info->waitevent);
29
++	if (ret == 0)
30
++		wait_for_completion(&open_info->waitevent);
31
+ 
32
+ 	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
33
+ 	list_del(&open_info->msglistentry);
34
+ 	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
35
+ 
36
++	if (ret != 0) {
37
++		err = ret;
38
++		goto error_free_gpadl;
39
++	}
40
++
41
+ 	if (newchannel->rescind) {
42
+ 		err = -ENODEV;
43
+ 		goto error_free_gpadl;
44
+@@ -209,11 +210,6 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
45
+ 	kfree(open_info);
46
+ 	return 0;
47
+ 
48
+-error_clean_msglist:
49
+-	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
50
+-	list_del(&open_info->msglistentry);
51
+-	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
52
+-
53
+ error_free_gpadl:
54
+ 	vmbus_teardown_gpadl(newchannel, newchannel->ringbuffer_gpadlhandle);
55
+ 	kfree(open_info);
56
+-- 
57
+2.13.0
58
+
0 59
new file mode 100644
... ...
@@ -0,0 +1,177 @@
0
+From 958ee135863de8865c62920bdff7bbea4199bf44 Mon Sep 17 00:00:00 2001
1
+From: Dexuan Cui <decui@microsoft.com>
2
+Date: Fri, 24 Mar 2017 20:53:18 +0800
3
+Subject: [PATCH 12/13] vmbus: dynamically enqueue/dequeue the channel on
4
+ vmbus_open/close
5
+
6
+Signed-off-by: Dexuan Cui <decui@microsoft.com>
7
+Origin: git@github.com:dcui/linux.git
8
+(cherry picked from commit bee4910daa4aed57ce60d2e2350e3cc120c383ca)
9
+---
10
+ drivers/hv/channel.c      | 16 ++++++++++---
11
+ drivers/hv/channel_mgmt.c | 58 ++++++++++++++++++++---------------------------
12
+ include/linux/hyperv.h    |  3 +++
13
+ 3 files changed, 40 insertions(+), 37 deletions(-)
14
+
15
+diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
16
+index 1caed01954f6..5bbcc964dbf7 100644
17
+--- a/drivers/hv/channel.c
18
+@@ -181,6 +181,10 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
19
+ 		      &vmbus_connection.chn_msg_list);
20
+ 	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
21
+ 
22
++	hv_event_tasklet_disable(newchannel);
23
++	hv_percpu_channel_enq(newchannel);
24
++	hv_event_tasklet_enable(newchannel);
25
++
26
+ 	ret = vmbus_post_msg(open_msg,
27
+ 			     sizeof(struct vmbus_channel_open_channel), true);
28
+ 
29
+@@ -193,23 +197,27 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
30
+ 
31
+ 	if (ret != 0) {
32
+ 		err = ret;
33
+-		goto error_free_gpadl;
34
++		goto error_deq_channel;
35
+ 	}
36
+ 
37
+ 	if (newchannel->rescind) {
38
+ 		err = -ENODEV;
39
+-		goto error_free_gpadl;
40
++		goto error_deq_channel;
41
+ 	}
42
+ 
43
+ 	if (open_info->response.open_result.status) {
44
+ 		err = -EAGAIN;
45
+-		goto error_free_gpadl;
46
++		goto error_deq_channel;
47
+ 	}
48
+ 
49
+ 	newchannel->state = CHANNEL_OPENED_STATE;
50
+ 	kfree(open_info);
51
+ 	return 0;
52
+ 
53
++error_deq_channel:
54
++	hv_event_tasklet_disable(newchannel);
55
++	hv_percpu_channel_deq(newchannel);
56
++	hv_event_tasklet_enable(newchannel);
57
+ error_free_gpadl:
58
+ 	vmbus_teardown_gpadl(newchannel, newchannel->ringbuffer_gpadlhandle);
59
+ 	kfree(open_info);
60
+@@ -555,6 +563,8 @@ static int vmbus_close_internal(struct vmbus_channel *channel)
61
+ 		goto out;
62
+ 	}
63
+ 
64
++	hv_percpu_channel_deq(channel);
65
++
66
+ 	channel->state = CHANNEL_OPEN_STATE;
67
+ 	channel->sc_creation_callback = NULL;
68
+ 	/* Stop callback and cancel the timer asap */
69
+diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
70
+index 2fe024e86209..b2bdcfb49144 100644
71
+--- a/drivers/hv/channel_mgmt.c
72
+@@ -375,6 +375,30 @@ static void vmbus_release_relid(u32 relid)
73
+ 		       true);
74
+ }
75
+ 
76
++void hv_percpu_channel_enq(struct vmbus_channel *channel)
77
++{
78
++	if (channel->target_cpu != get_cpu())
79
++		smp_call_function_single(channel->target_cpu,
80
++					 percpu_channel_enq,
81
++					 channel, true);
82
++	else
83
++		percpu_channel_enq(channel);
84
++	put_cpu();
85
++
86
++}
87
++
88
++void hv_percpu_channel_deq(struct vmbus_channel *channel)
89
++{
90
++	if (channel->target_cpu != get_cpu())
91
++		smp_call_function_single(channel->target_cpu,
92
++					 percpu_channel_deq,
93
++					 channel, true);
94
++	else
95
++		percpu_channel_deq(channel);
96
++	put_cpu();
97
++
98
++}
99
++
100
+ void hv_event_tasklet_disable(struct vmbus_channel *channel)
101
+ {
102
+ 	struct tasklet_struct *tasklet;
103
+@@ -409,17 +433,6 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
104
+ 	BUG_ON(!channel->rescind);
105
+ 	BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
106
+ 
107
+-	hv_event_tasklet_disable(channel);
108
+-	if (channel->target_cpu != get_cpu()) {
109
+-		put_cpu();
110
+-		smp_call_function_single(channel->target_cpu,
111
+-					 percpu_channel_deq, channel, true);
112
+-	} else {
113
+-		percpu_channel_deq(channel);
114
+-		put_cpu();
115
+-	}
116
+-	hv_event_tasklet_enable(channel);
117
+-
118
+ 	if (channel->primary_channel == NULL) {
119
+ 		list_del(&channel->listentry);
120
+ 
121
+@@ -512,18 +525,6 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
122
+ 
123
+ 	init_vp_index(newchannel, dev_type);
124
+ 
125
+-	hv_event_tasklet_disable(newchannel);
126
+-	if (newchannel->target_cpu != get_cpu()) {
127
+-		put_cpu();
128
+-		smp_call_function_single(newchannel->target_cpu,
129
+-					 percpu_channel_enq,
130
+-					 newchannel, true);
131
+-	} else {
132
+-		percpu_channel_enq(newchannel);
133
+-		put_cpu();
134
+-	}
135
+-	hv_event_tasklet_enable(newchannel);
136
+-
137
+ 	/*
138
+ 	 * This state is used to indicate a successful open
139
+ 	 * so that when we do close the channel normally, we
140
+@@ -572,17 +573,6 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
141
+ 	list_del(&newchannel->listentry);
142
+ 	mutex_unlock(&vmbus_connection.channel_mutex);
143
+ 
144
+-	hv_event_tasklet_disable(newchannel);
145
+-	if (newchannel->target_cpu != get_cpu()) {
146
+-		put_cpu();
147
+-		smp_call_function_single(newchannel->target_cpu,
148
+-					 percpu_channel_deq, newchannel, true);
149
+-	} else {
150
+-		percpu_channel_deq(newchannel);
151
+-		put_cpu();
152
+-	}
153
+-	hv_event_tasklet_enable(newchannel);
154
+-
155
+ 	vmbus_release_relid(newchannel->offermsg.child_relid);
156
+ 
157
+ err_free_chan:
158
+diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
159
+index 956acfc93487..9ee292b28e41 100644
160
+--- a/include/linux/hyperv.h
161
+@@ -1461,6 +1461,9 @@ extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf,
162
+ void hv_event_tasklet_disable(struct vmbus_channel *channel);
163
+ void hv_event_tasklet_enable(struct vmbus_channel *channel);
164
+ 
165
++void hv_percpu_channel_enq(struct vmbus_channel *channel);
166
++void hv_percpu_channel_deq(struct vmbus_channel *channel);
167
++
168
+ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid);
169
+ 
170
+ void vmbus_setevent(struct vmbus_channel *channel);
171
+-- 
172
+2.13.0
173
+
0 174
new file mode 100644
... ...
@@ -0,0 +1,47 @@
0
+From 34b5cdf37eacf3a82a9f47d8728c13acc11baed7 Mon Sep 17 00:00:00 2001
1
+From: Dexuan Cui <decui@microsoft.com>
2
+Date: Thu, 6 Jul 2017 21:37:11 +0000
3
+Subject: [PATCH 13/13] vmbus: fix the missed signaling in hv_signal_on_read()
4
+
5
+There is an off-by-one bug here, which can cause host-to-guest write to stall.
6
+
7
+When cur_write_sz == pending_sz, we shouldn't signal the host because it's
8
+meaningless: the ring mustn't be 100% full.
9
+
10
+But when cached_write_sz == pending_sz, we must signal the host.
11
+
12
+Fixes: 433e19cf33d3 ("Drivers: hv: vmbus: finally fix
13
+hv_need_to_signal_on_read()")
14
+
15
+Signed-off-by: John Starks <John.Starks@microsoft.com>
16
+Signed-off-by: Dexuan Cui <decui@microsoft.com>
17
+Cc: Haiyang Zhang <haiyangz@microsoft.com>
18
+Cc: Stephen Hemminger <sthemmin@microsoft.com>
19
+Cc: "K. Y. Srinivasan" <kys@microsoft.com>
20
+Cc: <stable@vger.kernel.org>
21
+Origin: https://patchwork.kernel.org/patch/9829039/
22
+---
23
+ include/linux/hyperv.h | 4 ++--
24
+ 1 file changed, 2 insertions(+), 2 deletions(-)
25
+
26
+diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
27
+index 9ee292b28e41..a87757cf277b 100644
28
+--- a/include/linux/hyperv.h
29
+@@ -1525,11 +1525,11 @@ static inline  void hv_signal_on_read(struct vmbus_channel *channel)
30
+ 
31
+ 	cur_write_sz = hv_get_bytes_to_write(rbi);
32
+ 
33
+-	if (cur_write_sz < pending_sz)
34
++	if (cur_write_sz <= pending_sz)
35
+ 		return;
36
+ 
37
+ 	cached_write_sz = hv_get_cached_bytes_to_write(rbi);
38
+-	if (cached_write_sz < pending_sz)
39
++	if (cached_write_sz <= pending_sz)
40
+ 		vmbus_setevent(channel);
41
+ 
42
+ 	return;
43
+-- 
44
+2.13.0
45
+
0 46
new file mode 100644
... ...
@@ -0,0 +1,1791 @@
0
+From dd53a1fc57f6a549aeb50dae4b4567690a16c120 Mon Sep 17 00:00:00 2001
1
+From: Dexuan Cui <decui@microsoft.com>
2
+Date: Sat, 23 Jul 2016 01:35:51 +0000
3
+Subject: [PATCH 03/13] hv_sock: introduce Hyper-V Sockets
4
+
5
+Hyper-V Sockets (hv_sock) supplies a byte-stream based communication
6
+mechanism between the host and the guest. It's somewhat like TCP over
7
+VMBus, but the transportation layer (VMBus) is much simpler than IP.
8
+
9
+With Hyper-V Sockets, applications between the host and the guest can talk
10
+to each other directly by the traditional BSD-style socket APIs.
11
+
12
+Hyper-V Sockets is only available on new Windows hosts, like Windows Server
13
+2016. More info is in this article "Make your own integration services":
14
+https://msdn.microsoft.com/en-us/virtualization/hyperv_on_windows/develop/make_mgmt_service
15
+
16
+The patch implements the necessary support in the guest side by introducing
17
+a new socket address family AF_HYPERV.
18
+
19
+Signed-off-by: Dexuan Cui <decui@microsoft.com>
20
+Cc: "K. Y. Srinivasan" <kys@microsoft.com>
21
+Cc: Haiyang Zhang <haiyangz@microsoft.com>
22
+Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
23
+Cc: Cathy Avery <cavery@redhat.com>
24
+Cc: Olaf Hering <olaf@aepfle.de>
25
+Origin: https://patchwork.kernel.org/patch/9244467/
26
+---
27
+ MAINTAINERS                 |    2 +
28
+ include/linux/hyperv.h      |   13 +
29
+ include/linux/socket.h      |    4 +-
30
+ include/net/af_hvsock.h     |   78 +++
31
+ include/uapi/linux/hyperv.h |   23 +
32
+ net/Kconfig                 |    1 +
33
+ net/Makefile                |    1 +
34
+ net/hv_sock/Kconfig         |   10 +
35
+ net/hv_sock/Makefile        |    3 +
36
+ net/hv_sock/af_hvsock.c     | 1507 +++++++++++++++++++++++++++++++++++++++++++
37
+ 10 files changed, 1641 insertions(+), 1 deletion(-)
38
+ create mode 100644 include/net/af_hvsock.h
39
+ create mode 100644 net/hv_sock/Kconfig
40
+ create mode 100644 net/hv_sock/Makefile
41
+ create mode 100644 net/hv_sock/af_hvsock.c
42
+
43
+diff --git a/MAINTAINERS b/MAINTAINERS
44
+index 63cefa62324c..e64920219d88 100644
45
+--- a/MAINTAINERS
46
+@@ -5853,7 +5853,9 @@ F:	drivers/pci/host/pci-hyperv.c
47
+ F:	drivers/net/hyperv/
48
+ F:	drivers/scsi/storvsc_drv.c
49
+ F:	drivers/video/fbdev/hyperv_fb.c
50
++F:	net/hv_sock/
51
+ F:	include/linux/hyperv.h
52
++F:	include/net/af_hvsock.h
53
+ F:	tools/hv/
54
+ F:	Documentation/ABI/stable/sysfs-bus-vmbus
55
+ 
56
+diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
57
+index d596a076da11..489ad74c1e6e 100644
58
+--- a/include/linux/hyperv.h
59
+@@ -1613,5 +1613,18 @@ static inline void commit_rd_index(struct vmbus_channel *channel)
60
+ 	hv_signal_on_read(channel);
61
+ }
62
+ 
63
++struct vmpipe_proto_header {
64
++	u32 pkt_type;
65
++	u32 data_size;
66
++};
67
++
68
++#define HVSOCK_HEADER_LEN	(sizeof(struct vmpacket_descriptor) + \
69
++				 sizeof(struct vmpipe_proto_header))
70
++
71
++/* See 'prev_indices' in hv_ringbuffer_read(), hv_ringbuffer_write() */
72
++#define PREV_INDICES_LEN	(sizeof(u64))
73
+ 
74
++#define HVSOCK_PKT_LEN(payload_len)	(HVSOCK_HEADER_LEN + \
75
++					ALIGN((payload_len), 8) + \
76
++					PREV_INDICES_LEN)
77
+ #endif /* _HYPERV_H */
78
+diff --git a/include/linux/socket.h b/include/linux/socket.h
79
+index b5cc5a6d7011..0b68b587d6ee 100644
80
+--- a/include/linux/socket.h
81
+@@ -202,8 +202,9 @@ struct ucred {
82
+ #define AF_VSOCK	40	/* vSockets			*/
83
+ #define AF_KCM		41	/* Kernel Connection Multiplexor*/
84
+ #define AF_QIPCRTR	42	/* Qualcomm IPC Router          */
85
++#define AF_HYPERV	43	/* Hyper-V Sockets              */
86
+ 
87
+-#define AF_MAX		43	/* For now.. */
88
++#define AF_MAX		44	/* For now.. */
89
+ 
90
+ /* Protocol families, same as address families. */
91
+ #define PF_UNSPEC	AF_UNSPEC
92
+@@ -251,6 +252,7 @@ struct ucred {
93
+ #define PF_VSOCK	AF_VSOCK
94
+ #define PF_KCM		AF_KCM
95
+ #define PF_QIPCRTR	AF_QIPCRTR
96
++#define PF_HYPERV	AF_HYPERV
97
+ #define PF_MAX		AF_MAX
98
+ 
99
+ /* Maximum queue length specifiable by listen.  */
100
+diff --git a/include/net/af_hvsock.h b/include/net/af_hvsock.h
101
+new file mode 100644
102
+index 000000000000..e7a8a3ae08e8
103
+--- /dev/null
104
+@@ -0,0 +1,78 @@
105
++#ifndef __AF_HVSOCK_H__
106
++#define __AF_HVSOCK_H__
107
++
108
++#include <linux/kernel.h>
109
++#include <linux/hyperv.h>
110
++#include <net/sock.h>
111
++
112
++/* The host side's design of the feature requires 5 exact 4KB pages for
113
++ * recv/send rings respectively -- this is suboptimal considering memory
114
++ * consumption, however unluckily we have to live with it, before the
115
++ * host comes up with a better design in the future.
116
++ */
117
++#define PAGE_SIZE_4K		4096
118
++#define RINGBUFFER_HVSOCK_RCV_SIZE (PAGE_SIZE_4K * 5)
119
++#define RINGBUFFER_HVSOCK_SND_SIZE (PAGE_SIZE_4K * 5)
120
++
121
++/* The MTU is 16KB per the host side's design.
122
++ * In future, the buffer can be elimiated when we switch to use the coming
123
++ * new VMBus ringbuffer "in-place consumption" APIs, by which we can
124
++ * directly copy data from VMBus ringbuffer into the userspace buffer.
125
++ */
126
++#define HVSOCK_MTU_SIZE		(1024 * 16)
127
++struct hvsock_recv_buf {
128
++	unsigned int data_len;
129
++	unsigned int data_offset;
130
++
131
++	struct vmpipe_proto_header hdr;
132
++	u8 buf[HVSOCK_MTU_SIZE];
133
++};
134
++
135
++/* In the VM, actually we can send up to HVSOCK_MTU_SIZE bytes of payload,
136
++ * but for now let's use a smaller size to minimize the dynamically-allocated
137
++ * buffer. Note: the buffer can be elimiated in future when we add new VMBus
138
++ * ringbuffer APIs that allow us to directly copy data from userspace buf to
139
++ * VMBus ringbuffer.
140
++ */
141
++#define HVSOCK_MAX_SND_SIZE_BY_VM (1024 * 4)
142
++struct hvsock_send_buf {
143
++	struct vmpipe_proto_header hdr;
144
++	u8 buf[HVSOCK_MAX_SND_SIZE_BY_VM];
145
++};
146
++
147
++struct hvsock_sock {
148
++	/* sk must be the first member. */
149
++	struct sock sk;
150
++
151
++	struct sockaddr_hv local_addr;
152
++	struct sockaddr_hv remote_addr;
153
++
154
++	/* protected by the global hvsock_mutex */
155
++	struct list_head bound_list;
156
++	struct list_head connected_list;
157
++
158
++	struct list_head accept_queue;
159
++	/* used by enqueue and dequeue */
160
++	struct mutex accept_queue_mutex;
161
++
162
++	struct delayed_work dwork;
163
++
164
++	u32 peer_shutdown;
165
++
166
++	struct vmbus_channel *channel;
167
++
168
++	struct hvsock_send_buf *send;
169
++	struct hvsock_recv_buf *recv;
170
++};
171
++
172
++static inline struct hvsock_sock *sk_to_hvsock(struct sock *sk)
173
++{
174
++	return (struct hvsock_sock *)sk;
175
++}
176
++
177
++static inline struct sock *hvsock_to_sk(struct hvsock_sock *hvsk)
178
++{
179
++	return (struct sock *)hvsk;
180
++}
181
++
182
++#endif /* __AF_HVSOCK_H__ */
183
+diff --git a/include/uapi/linux/hyperv.h b/include/uapi/linux/hyperv.h
184
+index e347b24ef9fb..eb3e44b69a5d 100644
185
+--- a/include/uapi/linux/hyperv.h
186
+@@ -26,6 +26,7 @@
187
+ #define _UAPI_HYPERV_H
188
+ 
189
+ #include <linux/uuid.h>
190
++#include <linux/socket.h>
191
+ 
192
+ /*
193
+  * Framework version for util services.
194
+@@ -396,4 +397,26 @@ struct hv_kvp_ip_msg {
195
+ 	struct hv_kvp_ipaddr_value      kvp_ip_val;
196
+ } __attribute__((packed));
197
+ 
198
++/* This is the address format of Hyper-V Sockets.
199
++ * Note: here we just borrow the kernel's built-in type uuid_le. When
200
++ * an application calls bind() or connect(), the 2 members of struct
201
++ * sockaddr_hv must be of GUID.
202
++ * The GUID format differs from the UUID format only in the byte order of
203
++ * the first 3 fields. Refer to:
204
++ * https://en.wikipedia.org/wiki/Globally_unique_identifier
205
++ */
206
++struct sockaddr_hv {
207
++	__kernel_sa_family_t	shv_family;  /* Address family		*/
208
++	u16		reserved;	     /* Must be Zero		*/
209
++	uuid_le		shv_vm_guid;	     /* VM ID			*/
210
++	uuid_le		shv_service_guid;    /* Service ID		*/
211
++};
212
++
213
++#define SHV_VMID_GUEST	NULL_UUID_LE
214
++#define SHV_VMID_HOST	NULL_UUID_LE
215
++
216
++#define SHV_SERVICE_ID_ANY	NULL_UUID_LE
217
++
218
++#define SHV_PROTO_RAW		1
219
++
220
+ #endif /* _UAPI_HYPERV_H */
221
+diff --git a/net/Kconfig b/net/Kconfig
222
+index 7b6cd340b72b..a9be6907a620 100644
223
+--- a/net/Kconfig
224
+@@ -231,6 +231,7 @@ source "net/dns_resolver/Kconfig"
225
+ source "net/batman-adv/Kconfig"
226
+ source "net/openvswitch/Kconfig"
227
+ source "net/vmw_vsock/Kconfig"
228
++source "net/hv_sock/Kconfig"
229
+ source "net/netlink/Kconfig"
230
+ source "net/mpls/Kconfig"
231
+ source "net/hsr/Kconfig"
232
+diff --git a/net/Makefile b/net/Makefile
233
+index 4cafaa2b4667..2b357eb81865 100644
234
+--- a/net/Makefile
235
+@@ -71,6 +71,7 @@ obj-$(CONFIG_BATMAN_ADV)	+= batman-adv/
236
+ obj-$(CONFIG_NFC)		+= nfc/
237
+ obj-$(CONFIG_OPENVSWITCH)	+= openvswitch/
238
+ obj-$(CONFIG_VSOCKETS)	+= vmw_vsock/
239
++obj-$(CONFIG_HYPERV_SOCK)	+= hv_sock/
240
+ obj-$(CONFIG_MPLS)		+= mpls/
241
+ obj-$(CONFIG_HSR)		+= hsr/
242
+ ifneq ($(CONFIG_NET_SWITCHDEV),)
243
+diff --git a/net/hv_sock/Kconfig b/net/hv_sock/Kconfig
244
+new file mode 100644
245
+index 000000000000..ff84875564d1
246
+--- /dev/null
247
+@@ -0,0 +1,10 @@
248
++config HYPERV_SOCK
249
++	tristate "Hyper-V Sockets"
250
++	depends on HYPERV
251
++	default m if HYPERV
252
++	help
253
++	  Hyper-V Sockets is a socket interface for high speed
254
++	  communication between Linux guest and Hyper-V host over VMBus.
255
++
256
++	  To compile this driver as a module, choose M here: the module
257
++	  will be called hv_sock.
258
+diff --git a/net/hv_sock/Makefile b/net/hv_sock/Makefile
259
+new file mode 100644
260
+index 000000000000..716c01230129
261
+--- /dev/null
262
+@@ -0,0 +1,3 @@
263
++obj-$(CONFIG_HYPERV_SOCK) += hv_sock.o
264
++
265
++hv_sock-y += af_hvsock.o
266
+diff --git a/net/hv_sock/af_hvsock.c b/net/hv_sock/af_hvsock.c
267
+new file mode 100644
268
+index 000000000000..331d3759f5cb
269
+--- /dev/null
270
+@@ -0,0 +1,1507 @@
271
++/*
272
++ * Hyper-V Sockets -- a socket-based communication channel between the
273
++ * Hyper-V host and the virtual machines running on it.
274
++ *
275
++ * Copyright (c) 2016 Microsoft Corporation.
276
++ *
277
++ * All rights reserved.
278
++ *
279
++ * Redistribution and use in source and binary forms, with or without
280
++ * modification, are permitted provided that the following conditions
281
++ * are met:
282
++ *
283
++ * 1. Redistributions of source code must retain the above copyright
284
++ *    notice, this list of conditions and the following disclaimer.
285
++ * 2. Redistributions in binary form must reproduce the above copyright
286
++ *    notice, this list of conditions and the following disclaimer in the
287
++ *    documentation and/or other materials provided with the distribution.
288
++ * 3. The name of the author may not be used to endorse or promote
289
++ *    products derived from this software without specific prior written
290
++ *    permission.
291
++ *
292
++ * Alternatively, this software may be distributed under the terms of the
293
++ * GNU General Public License ("GPL") version 2 as published by the Free
294
++ * Software Foundation.
295
++ *
296
++ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
297
++ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
298
++ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
299
++ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
300
++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
301
++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
302
++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
303
++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
304
++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
305
++ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
306
++ * POSSIBILITY OF SUCH DAMAGE.
307
++ */
308
++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
309
++
310
++#include <linux/init.h>
311
++#include <linux/module.h>
312
++#include <linux/vmalloc.h>
313
++#include <net/af_hvsock.h>
314
++
315
++static struct proto hvsock_proto = {
316
++	.name = "HV_SOCK",
317
++	.owner = THIS_MODULE,
318
++	.obj_size = sizeof(struct hvsock_sock),
319
++};
320
++
321
++#define SS_LISTEN 255
322
++
323
++#define HVSOCK_CONNECT_TIMEOUT (30 * HZ)
324
++
325
++/* This is an artificial limit */
326
++#define HVSOCK_MAX_BACKLOG	128
327
++
328
++static LIST_HEAD(hvsock_bound_list);
329
++static LIST_HEAD(hvsock_connected_list);
330
++static DEFINE_MUTEX(hvsock_mutex);
331
++
332
++static struct sock *hvsock_find_bound_socket(const struct sockaddr_hv *addr)
333
++{
334
++	struct hvsock_sock *hvsk;
335
++
336
++	list_for_each_entry(hvsk, &hvsock_bound_list, bound_list) {
337
++		if (!uuid_le_cmp(addr->shv_service_guid,
338
++				 hvsk->local_addr.shv_service_guid))
339
++			return hvsock_to_sk(hvsk);
340
++	}
341
++	return NULL;
342
++}
343
++
344
++static struct sock *hvsock_find_connected_socket_by_channel(
345
++	const struct vmbus_channel *channel)
346
++{
347
++	struct hvsock_sock *hvsk;
348
++
349
++	list_for_each_entry(hvsk, &hvsock_connected_list, connected_list) {
350
++		if (hvsk->channel == channel)
351
++			return hvsock_to_sk(hvsk);
352
++	}
353
++	return NULL;
354
++}
355
++
356
++static void hvsock_enqueue_accept(struct sock *listener,
357
++				  struct sock *connected)
358
++{
359
++	struct hvsock_sock *hvconnected;
360
++	struct hvsock_sock *hvlistener;
361
++
362
++	hvlistener = sk_to_hvsock(listener);
363
++	hvconnected = sk_to_hvsock(connected);
364
++
365
++	sock_hold(connected);
366
++	sock_hold(listener);
367
++
368
++	mutex_lock(&hvlistener->accept_queue_mutex);
369
++	list_add_tail(&hvconnected->accept_queue, &hvlistener->accept_queue);
370
++	listener->sk_ack_backlog++;
371
++	mutex_unlock(&hvlistener->accept_queue_mutex);
372
++}
373
++
374
++static struct sock *hvsock_dequeue_accept(struct sock *listener)
375
++{
376
++	struct hvsock_sock *hvconnected;
377
++	struct hvsock_sock *hvlistener;
378
++
379
++	hvlistener = sk_to_hvsock(listener);
380
++
381
++	mutex_lock(&hvlistener->accept_queue_mutex);
382
++
383
++	if (list_empty(&hvlistener->accept_queue)) {
384
++		mutex_unlock(&hvlistener->accept_queue_mutex);
385
++		return NULL;
386
++	}
387
++
388
++	hvconnected = list_entry(hvlistener->accept_queue.next,
389
++				 struct hvsock_sock, accept_queue);
390
++
391
++	list_del_init(&hvconnected->accept_queue);
392
++	listener->sk_ack_backlog--;
393
++
394
++	mutex_unlock(&hvlistener->accept_queue_mutex);
395
++
396
++	sock_put(listener);
397
++	/* The caller will need a reference on the connected socket so we let
398
++	 * it call sock_put().
399
++	 */
400
++
401
++	return hvsock_to_sk(hvconnected);
402
++}
403
++
404
++static bool hvsock_is_accept_queue_empty(struct sock *sk)
405
++{
406
++	struct hvsock_sock *hvsk = sk_to_hvsock(sk);
407
++	int ret;
408
++
409
++	mutex_lock(&hvsk->accept_queue_mutex);
410
++	ret = list_empty(&hvsk->accept_queue);
411
++	mutex_unlock(&hvsk->accept_queue_mutex);
412
++
413
++	return ret;
414
++}
415
++
416
++static void hvsock_addr_init(struct sockaddr_hv *addr, uuid_le service_id)
417
++{
418
++	memset(addr, 0, sizeof(*addr));
419
++	addr->shv_family = AF_HYPERV;
420
++	addr->shv_service_guid = service_id;
421
++}
422
++
423
++static int hvsock_addr_validate(const struct sockaddr_hv *addr)
424
++{
425
++	if (!addr)
426
++		return -EFAULT;
427
++
428
++	if (addr->shv_family != AF_HYPERV)
429
++		return -EAFNOSUPPORT;
430
++
431
++	if (addr->reserved != 0)
432
++		return -EINVAL;
433
++
434
++	return 0;
435
++}
436
++
437
++static bool hvsock_addr_bound(const struct sockaddr_hv *addr)
438
++{
439
++	return !!uuid_le_cmp(addr->shv_service_guid, SHV_SERVICE_ID_ANY);
440
++}
441
++
442
++static int hvsock_addr_cast(const struct sockaddr *addr, size_t len,
443
++			    struct sockaddr_hv **out_addr)
444
++{
445
++	if (len < sizeof(**out_addr))
446
++		return -EFAULT;
447
++
448
++	*out_addr = (struct sockaddr_hv *)addr;
449
++	return hvsock_addr_validate(*out_addr);
450
++}
451
++
452
++static int __hvsock_do_bind(struct hvsock_sock *hvsk,
453
++			    struct sockaddr_hv *addr)
454
++{
455
++	struct sockaddr_hv hv_addr;
456
++	int ret = 0;
457
++
458
++	hvsock_addr_init(&hv_addr, addr->shv_service_guid);
459
++
460
++	mutex_lock(&hvsock_mutex);
461
++
462
++	if (!uuid_le_cmp(addr->shv_service_guid, SHV_SERVICE_ID_ANY)) {
463
++		do {
464
++			uuid_le_gen(&hv_addr.shv_service_guid);
465
++		} while (hvsock_find_bound_socket(&hv_addr));
466
++	} else {
467
++		if (hvsock_find_bound_socket(&hv_addr)) {
468
++			ret = -EADDRINUSE;
469
++			goto out;
470
++		}
471
++	}
472
++
473
++	hvsock_addr_init(&hvsk->local_addr, hv_addr.shv_service_guid);
474
++
475
++	sock_hold(&hvsk->sk);
476
++	list_add(&hvsk->bound_list, &hvsock_bound_list);
477
++out:
478
++	mutex_unlock(&hvsock_mutex);
479
++
480
++	return ret;
481
++}
482
++
483
++static int __hvsock_bind(struct sock *sk, struct sockaddr_hv *addr)
484
++{
485
++	struct hvsock_sock *hvsk = sk_to_hvsock(sk);
486
++	int ret;
487
++
488
++	if (hvsock_addr_bound(&hvsk->local_addr))
489
++		return -EINVAL;
490
++
491
++	switch (sk->sk_socket->type) {
492
++	case SOCK_STREAM:
493
++		ret = __hvsock_do_bind(hvsk, addr);
494
++		break;
495
++
496
++	default:
497
++		ret = -EINVAL;
498
++		break;
499
++	}
500
++
501
++	return ret;
502
++}
503
++
504
++/* Autobind this socket to the local address if necessary. */
505
++static int hvsock_auto_bind(struct hvsock_sock *hvsk)
506
++{
507
++	struct sock *sk = hvsock_to_sk(hvsk);
508
++	struct sockaddr_hv local_addr;
509
++
510
++	if (hvsock_addr_bound(&hvsk->local_addr))
511
++		return 0;
512
++	hvsock_addr_init(&local_addr, SHV_SERVICE_ID_ANY);
513
++	return __hvsock_bind(sk, &local_addr);
514
++}
515
++
516
++static void hvsock_sk_destruct(struct sock *sk)
517
++{
518
++	struct vmbus_channel *channel;
519
++	struct hvsock_sock *hvsk;
520
++
521
++	hvsk = sk_to_hvsock(sk);
522
++	vfree(hvsk->send);
523
++	vfree(hvsk->recv);
524
++
525
++	channel = hvsk->channel;
526
++	if (!channel)
527
++		return;
528
++
529
++	vmbus_hvsock_device_unregister(channel);
530
++}
531
++
532
++static void __hvsock_release(struct sock *sk)
533
++{
534
++	struct hvsock_sock *hvsk;
535
++	struct sock *pending;
536
++
537
++	hvsk = sk_to_hvsock(sk);
538
++
539
++	mutex_lock(&hvsock_mutex);
540
++
541
++	if (!list_empty(&hvsk->bound_list)) {
542
++		list_del_init(&hvsk->bound_list);
543
++		sock_put(&hvsk->sk);
544
++	}
545
++
546
++	if (!list_empty(&hvsk->connected_list)) {
547
++		list_del_init(&hvsk->connected_list);
548
++		sock_put(&hvsk->sk);
549
++	}
550
++
551
++	mutex_unlock(&hvsock_mutex);
552
++
553
++	lock_sock(sk);
554
++	sock_orphan(sk);
555
++	sk->sk_shutdown = SHUTDOWN_MASK;
556
++
557
++	/* Clean up any sockets that never were accepted. */
558
++	while ((pending = hvsock_dequeue_accept(sk)) != NULL) {
559
++		__hvsock_release(pending);
560
++		sock_put(pending);
561
++	}
562
++
563
++	release_sock(sk);
564
++	sock_put(sk);
565
++}
566
++
567
++static int hvsock_release(struct socket *sock)
568
++{
569
++	/* If accept() is interrupted by a signal, the temporary socket
570
++	 * struct's sock->sk is NULL.
571
++	 */
572
++	if (sock->sk) {
573
++		__hvsock_release(sock->sk);
574
++		sock->sk = NULL;
575
++	}
576
++
577
++	sock->state = SS_FREE;
578
++	return 0;
579
++}
580
++
581
++static struct sock *hvsock_create(struct net *net, struct socket *sock,
582
++				  gfp_t priority, unsigned short type)
583
++{
584
++	struct hvsock_sock *hvsk;
585
++	struct sock *sk;
586
++
587
++	sk = sk_alloc(net, AF_HYPERV, priority, &hvsock_proto, 0);
588
++	if (!sk)
589
++		return NULL;
590
++
591
++	sock_init_data(sock, sk);
592
++
593
++	/* sk->sk_type is normally set in sock_init_data, but only if sock
594
++	 * is non-NULL. We make sure that our sockets always have a type by
595
++	 * setting it here if needed.
596
++	 */
597
++	if (!sock)
598
++		sk->sk_type = type;
599
++
600
++	sk->sk_destruct = hvsock_sk_destruct;
601
++
602
++	/* Looks stream-based socket doesn't need this. */
603
++	sk->sk_backlog_rcv = NULL;
604
++
605
++	sk->sk_state = 0;
606
++	sock_reset_flag(sk, SOCK_DONE);
607
++
608
++	hvsk = sk_to_hvsock(sk);
609
++
610
++	hvsk->send = NULL;
611
++	hvsk->recv = NULL;
612
++
613
++	hvsock_addr_init(&hvsk->local_addr, SHV_SERVICE_ID_ANY);
614
++	hvsock_addr_init(&hvsk->remote_addr, SHV_SERVICE_ID_ANY);
615
++
616
++	INIT_LIST_HEAD(&hvsk->bound_list);
617
++	INIT_LIST_HEAD(&hvsk->connected_list);
618
++
619
++	INIT_LIST_HEAD(&hvsk->accept_queue);
620
++	mutex_init(&hvsk->accept_queue_mutex);
621
++
622
++	hvsk->peer_shutdown = 0;
623
++
624
++	return sk;
625
++}
626
++
627
++static int hvsock_bind(struct socket *sock, struct sockaddr *addr,
628
++		       int addr_len)
629
++{
630
++	struct sockaddr_hv *hv_addr;
631
++	struct sock *sk;
632
++	int ret;
633
++
634
++	sk = sock->sk;
635
++
636
++	if (hvsock_addr_cast(addr, addr_len, &hv_addr) != 0)
637
++		return -EINVAL;
638
++
639
++	if (uuid_le_cmp(hv_addr->shv_vm_guid, NULL_UUID_LE))
640
++		return -EINVAL;
641
++
642
++	lock_sock(sk);
643
++	ret = __hvsock_bind(sk, hv_addr);
644
++	release_sock(sk);
645
++
646
++	return ret;
647
++}
648
++
649
++static int hvsock_getname(struct socket *sock,
650
++			  struct sockaddr *addr, int *addr_len, int peer)
651
++{
652
++	struct sockaddr_hv *hv_addr;
653
++	struct hvsock_sock *hvsk;
654
++	struct sock *sk;
655
++	int ret;
656
++
657
++	sk = sock->sk;
658
++	hvsk = sk_to_hvsock(sk);
659
++	ret = 0;
660
++
661
++	lock_sock(sk);
662
++
663
++	if (peer) {
664
++		if (sock->state != SS_CONNECTED) {
665
++			ret = -ENOTCONN;
666
++			goto out;
667
++		}
668
++		hv_addr = &hvsk->remote_addr;
669
++	} else {
670
++		hv_addr = &hvsk->local_addr;
671
++	}
672
++
673
++	__sockaddr_check_size(sizeof(*hv_addr));
674
++
675
++	memcpy(addr, hv_addr, sizeof(*hv_addr));
676
++	*addr_len = sizeof(*hv_addr);
677
++
678
++out:
679
++	release_sock(sk);
680
++	return ret;
681
++}
682
++
683
++static void get_ringbuffer_rw_status(struct vmbus_channel *channel,
684
++				     bool *can_read, bool *can_write)
685
++{
686
++	u32 avl_read_bytes, avl_write_bytes, dummy;
687
++
688
++	if (can_read) {
689
++		hv_get_ringbuffer_availbytes(&channel->inbound,
690
++					     &avl_read_bytes,
691
++					     &dummy);
692
++		/* 0-size payload means FIN */
693
++		*can_read = avl_read_bytes >= HVSOCK_PKT_LEN(0);
694
++	}
695
++
696
++	if (can_write) {
697
++		hv_get_ringbuffer_availbytes(&channel->outbound,
698
++					     &dummy,
699
++					     &avl_write_bytes);
700
++
701
++		/* We only write if there is enough space */
702
++		*can_write = avl_write_bytes > HVSOCK_PKT_LEN(PAGE_SIZE_4K);
703
++	}
704
++}
705
++
706
++static size_t get_ringbuffer_writable_bytes(struct vmbus_channel *channel)
707
++{
708
++	u32 avl_write_bytes, dummy;
709
++	size_t ret;
710
++
711
++	hv_get_ringbuffer_availbytes(&channel->outbound,
712
++				     &dummy,
713
++				     &avl_write_bytes);
714
++
715
++	/* The ringbuffer mustn't be 100% full, and we should reserve a
716
++	 * zero-length-payload packet for the FIN: see hv_ringbuffer_write()
717
++	 * and hvsock_shutdown().
718
++	 */
719
++	if (avl_write_bytes < HVSOCK_PKT_LEN(1) + HVSOCK_PKT_LEN(0))
720
++		return 0;
721
++	ret = avl_write_bytes - HVSOCK_PKT_LEN(1) - HVSOCK_PKT_LEN(0);
722
++
723
++	return round_down(ret, 8);
724
++}
725
++
726
++static int hvsock_get_send_buf(struct hvsock_sock *hvsk)
727
++{
728
++	hvsk->send = vmalloc(sizeof(*hvsk->send));
729
++	return hvsk->send ? 0 : -ENOMEM;
730
++}
731
++
732
++static void hvsock_put_send_buf(struct hvsock_sock *hvsk)
733
++{
734
++	vfree(hvsk->send);
735
++	hvsk->send = NULL;
736
++}
737
++
738
++static int hvsock_send_data(struct vmbus_channel *channel,
739
++			    struct hvsock_sock *hvsk,
740
++			    size_t to_write)
741
++{
742
++	hvsk->send->hdr.pkt_type = 1;
743
++	hvsk->send->hdr.data_size = to_write;
744
++	return vmbus_sendpacket(channel, &hvsk->send->hdr,
745
++				sizeof(hvsk->send->hdr) + to_write,
746
++				0, VM_PKT_DATA_INBAND, 0);
747
++}
748
++
749
++static int hvsock_get_recv_buf(struct hvsock_sock *hvsk)
750
++{
751
++	hvsk->recv = vmalloc(sizeof(*hvsk->recv));
752
++	return hvsk->recv ? 0 : -ENOMEM;
753
++}
754
++
755
++static void hvsock_put_recv_buf(struct hvsock_sock *hvsk)
756
++{
757
++	vfree(hvsk->recv);
758
++	hvsk->recv = NULL;
759
++}
760
++
761
++static int hvsock_recv_data(struct vmbus_channel *channel,
762
++			    struct hvsock_sock *hvsk,
763
++			    size_t *payload_len)
764
++{
765
++	u32 buffer_actual_len;
766
++	u64 dummy_req_id;
767
++	int ret;
768
++
769
++	ret = vmbus_recvpacket(channel, &hvsk->recv->hdr,
770
++			       sizeof(hvsk->recv->hdr) +
771
++			       sizeof(hvsk->recv->buf),
772
++			       &buffer_actual_len, &dummy_req_id);
773
++	if (ret != 0 || buffer_actual_len <= sizeof(hvsk->recv->hdr))
774
++		*payload_len = 0;
775
++	else
776
++		*payload_len = hvsk->recv->hdr.data_size;
777
++
778
++	return ret;
779
++}
780
++
781
++static int hvsock_shutdown(struct socket *sock, int mode)
782
++{
783
++	struct hvsock_sock *hvsk;
784
++	struct sock *sk;
785
++	int ret = 0;
786
++
787
++	if (mode < SHUT_RD || mode > SHUT_RDWR)
788
++		return -EINVAL;
789
++	/* This maps:
790
++	 * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
791
++	 * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
792
++	 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
793
++	 */
794
++	++mode;
795
++
796
++	if (sock->state != SS_CONNECTED)
797
++		return -ENOTCONN;
798
++
799
++	sock->state = SS_DISCONNECTING;
800
++
801
++	sk = sock->sk;
802
++
803
++	lock_sock(sk);
804
++
805
++	sk->sk_shutdown |= mode;
806
++	sk->sk_state_change(sk);
807
++
808
++	if (mode & SEND_SHUTDOWN) {
809
++		hvsk = sk_to_hvsock(sk);
810
++
811
++		ret = hvsock_get_send_buf(hvsk);
812
++		if (ret < 0)
813
++			goto out;
814
++
815
++		/* It can't fail: see get_ringbuffer_writable_bytes(). */
816
++		(void)hvsock_send_data(hvsk->channel, hvsk, 0);
817
++
818
++		hvsock_put_send_buf(hvsk);
819
++	}
820
++
821
++out:
822
++	release_sock(sk);
823
++
824
++	return ret;
825
++}
826
++
827
++static unsigned int hvsock_poll(struct file *file, struct socket *sock,
828
++				poll_table *wait)
829
++{
830
++	struct vmbus_channel *channel;
831
++	bool can_read, can_write;
832
++	struct hvsock_sock *hvsk;
833
++	unsigned int mask;
834
++	struct sock *sk;
835
++
836
++	sk = sock->sk;
837
++	hvsk = sk_to_hvsock(sk);
838
++
839
++	poll_wait(file, sk_sleep(sk), wait);
840
++	mask = 0;
841
++
842
++	if (sk->sk_err)
843
++		/* Signify that there has been an error on this socket. */
844
++		mask |= POLLERR;
845
++
846
++	/* INET sockets treat local write shutdown and peer write shutdown as a
847
++	 * case of POLLHUP set.
848
++	 */
849
++	if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
850
++	    ((sk->sk_shutdown & SEND_SHUTDOWN) &&
851
++	     (hvsk->peer_shutdown & SEND_SHUTDOWN))) {
852
++		mask |= POLLHUP;
853
++	}
854
++
855
++	if (sk->sk_shutdown & RCV_SHUTDOWN ||
856
++	    hvsk->peer_shutdown & SEND_SHUTDOWN) {
857
++		mask |= POLLRDHUP;
858
++	}
859
++
860
++	lock_sock(sk);
861
++
862
++	/* Listening sockets that have connections in their accept
863
++	 * queue can be read.
864
++	 */
865
++	if (sk->sk_state == SS_LISTEN && !hvsock_is_accept_queue_empty(sk))
866
++		mask |= POLLIN | POLLRDNORM;
867
++
868
++	/* The mutex is to against hvsock_open_connection() */
869
++	mutex_lock(&hvsock_mutex);
870
++
871
++	channel = hvsk->channel;
872
++	if (channel) {
873
++		/* If there is something in the queue then we can read */
874
++		get_ringbuffer_rw_status(channel, &can_read, &can_write);
875
++
876
++		if (!can_read && hvsk->recv)
877
++			can_read = true;
878
++
879
++		if (!(sk->sk_shutdown & RCV_SHUTDOWN) && can_read)
880
++			mask |= POLLIN | POLLRDNORM;
881
++	} else {
882
++		can_write = false;
883
++	}
884
++
885
++	mutex_unlock(&hvsock_mutex);
886
++
887
++	/* Sockets whose connections have been closed terminated should
888
++	 * also be considered read, and we check the shutdown flag for that.
889
++	 */
890
++	if (sk->sk_shutdown & RCV_SHUTDOWN ||
891
++	    hvsk->peer_shutdown & SEND_SHUTDOWN) {
892
++		mask |= POLLIN | POLLRDNORM;
893
++	}
894
++
895
++	/* Connected sockets that can produce data can be written. */
896
++	if (sk->sk_state == SS_CONNECTED && can_write &&
897
++	    !(sk->sk_shutdown & SEND_SHUTDOWN)) {
898
++		/* Remove POLLWRBAND since INET sockets are not setting it.
899
++		 */
900
++		mask |= POLLOUT | POLLWRNORM;
901
++	}
902
++
903
++	/* Simulate INET socket poll behaviors, which sets
904
++	 * POLLOUT|POLLWRNORM when peer is closed and nothing to read,
905
++	 * but local send is not shutdown.
906
++	 */
907
++	if (sk->sk_state == SS_UNCONNECTED &&
908
++	    !(sk->sk_shutdown & SEND_SHUTDOWN))
909
++		mask |= POLLOUT | POLLWRNORM;
910
++
911
++	release_sock(sk);
912
++
913
++	return mask;
914
++}
915
++
916
++/* This function runs in the tasklet context of process_chn_event() */
917
++static void hvsock_on_channel_cb(void *ctx)
918
++{
919
++	struct sock *sk = (struct sock *)ctx;
920
++	struct vmbus_channel *channel;
921
++	struct hvsock_sock *hvsk;
922
++	bool can_read, can_write;
923
++
924
++	hvsk = sk_to_hvsock(sk);
925
++	channel = hvsk->channel;
926
++	BUG_ON(!channel);
927
++
928
++	get_ringbuffer_rw_status(channel, &can_read, &can_write);
929
++
930
++	if (can_read)
931
++		sk->sk_data_ready(sk);
932
++
933
++	if (can_write)
934
++		sk->sk_write_space(sk);
935
++}
936
++
937
++static void hvsock_close_connection(struct vmbus_channel *channel)
938
++{
939
++	struct hvsock_sock *hvsk;
940
++	struct sock *sk;
941
++
942
++	mutex_lock(&hvsock_mutex);
943
++
944
++	sk = hvsock_find_connected_socket_by_channel(channel);
945
++
946
++	/* The guest has already closed the connection? */
947
++	if (!sk)
948
++		goto out;
949
++
950
++	sk->sk_state = SS_UNCONNECTED;
951
++	sock_set_flag(sk, SOCK_DONE);
952
++
953
++	hvsk = sk_to_hvsock(sk);
954
++	hvsk->peer_shutdown |= SEND_SHUTDOWN | RCV_SHUTDOWN;
955
++
956
++	sk->sk_state_change(sk);
957
++out:
958
++	mutex_unlock(&hvsock_mutex);
959
++}
960
++
961
++static int hvsock_open_connection(struct vmbus_channel *channel)
962
++{
963
++	struct hvsock_sock *hvsk = NULL, *new_hvsk = NULL;
964
++	uuid_le *instance, *service_id;
965
++	unsigned char conn_from_host;
966
++	struct sockaddr_hv hv_addr;
967
++	struct sock *sk, *new_sk = NULL;
968
++	int ret;
969
++
970
++	instance = &channel->offermsg.offer.if_instance;
971
++	service_id = &channel->offermsg.offer.if_type;
972
++
973
++	/* The first byte != 0 means the host initiated the connection. */
974
++	conn_from_host = channel->offermsg.offer.u.pipe.user_def[0];
975
++
976
++	mutex_lock(&hvsock_mutex);
977
++
978
++	hvsock_addr_init(&hv_addr, conn_from_host ? *service_id : *instance);
979
++	sk = hvsock_find_bound_socket(&hv_addr);
980
++
981
++	if (!sk || (conn_from_host && sk->sk_state != SS_LISTEN) ||
982
++	    (!conn_from_host && sk->sk_state != SS_CONNECTING)) {
983
++		ret = -ENXIO;
984
++		goto out;
985
++	}
986
++
987
++	if (conn_from_host) {
988
++		if (sk->sk_ack_backlog >= sk->sk_max_ack_backlog) {
989
++			ret = -ECONNREFUSED;
990
++			goto out;
991
++		}
992
++
993
++		new_sk = hvsock_create(sock_net(sk), NULL, GFP_KERNEL,
994
++				       sk->sk_type);
995
++		if (!new_sk) {
996
++			ret = -ENOMEM;
997
++			goto out;
998
++		}
999
++
1000
++		new_sk->sk_state = SS_CONNECTING;
1001
++		new_hvsk = sk_to_hvsock(new_sk);
1002
++		new_hvsk->channel = channel;
1003
++		hvsock_addr_init(&new_hvsk->local_addr, *service_id);
1004
++		hvsock_addr_init(&new_hvsk->remote_addr, *instance);
1005
++	} else {
1006
++		hvsk = sk_to_hvsock(sk);
1007
++		hvsk->channel = channel;
1008
++	}
1009
++
1010
++	set_channel_read_state(channel, false);
1011
++	ret = vmbus_open(channel, RINGBUFFER_HVSOCK_SND_SIZE,
1012
++			 RINGBUFFER_HVSOCK_RCV_SIZE, NULL, 0,
1013
++			 hvsock_on_channel_cb, conn_from_host ? new_sk : sk);
1014
++	if (ret != 0) {
1015
++		if (conn_from_host) {
1016
++			new_hvsk->channel = NULL;
1017
++			sock_put(new_sk);
1018
++		} else {
1019
++			hvsk->channel = NULL;
1020
++		}
1021
++		goto out;
1022
++	}
1023
++
1024
++	vmbus_set_chn_rescind_callback(channel, hvsock_close_connection);
1025
++
1026
++	/* see get_ringbuffer_rw_status() */
1027
++	set_channel_pending_send_size(channel,
1028
++				      HVSOCK_PKT_LEN(PAGE_SIZE_4K) + 1);
1029
++
1030
++	if (conn_from_host) {
1031
++		new_sk->sk_state = SS_CONNECTED;
1032
++
1033
++		sock_hold(&new_hvsk->sk);
1034
++		list_add(&new_hvsk->connected_list, &hvsock_connected_list);
1035
++
1036
++		hvsock_enqueue_accept(sk, new_sk);
1037
++	} else {
1038
++		sk->sk_state = SS_CONNECTED;
1039
++		sk->sk_socket->state = SS_CONNECTED;
1040
++
1041
++		sock_hold(&hvsk->sk);
1042
++		list_add(&hvsk->connected_list, &hvsock_connected_list);
1043
++	}
1044
++
1045
++	sk->sk_state_change(sk);
1046
++out:
1047
++	mutex_unlock(&hvsock_mutex);
1048
++	return ret;
1049
++}
1050
++
1051
++static void hvsock_connect_timeout(struct work_struct *work)
1052
++{
1053
++	struct hvsock_sock *hvsk;
1054
++	struct sock *sk;
1055
++
1056
++	hvsk = container_of(work, struct hvsock_sock, dwork.work);
1057
++	sk = hvsock_to_sk(hvsk);
1058
++
1059
++	lock_sock(sk);
1060
++	if ((sk->sk_state == SS_CONNECTING) &&
1061
++	    (sk->sk_shutdown != SHUTDOWN_MASK)) {
1062
++		sk->sk_state = SS_UNCONNECTED;
1063
++		sk->sk_err = ETIMEDOUT;
1064
++		sk->sk_error_report(sk);
1065
++	}
1066
++	release_sock(sk);
1067
++
1068
++	sock_put(sk);
1069
++}
1070
++
1071
++static int hvsock_connect_wait(struct socket *sock,
1072
++			       int flags, int current_ret)
1073
++{
1074
++	struct sock *sk = sock->sk;
1075
++	struct hvsock_sock *hvsk;
1076
++	int ret = current_ret;
1077
++	DEFINE_WAIT(wait);
1078
++	long timeout;
1079
++
1080
++	hvsk = sk_to_hvsock(sk);
1081
++	timeout = HVSOCK_CONNECT_TIMEOUT;
1082
++	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1083
++
1084
++	while (sk->sk_state != SS_CONNECTED && sk->sk_err == 0) {
1085
++		if (flags & O_NONBLOCK) {
1086
++			/* If we're not going to block, we schedule a timeout
1087
++			 * function to generate a timeout on the connection
1088
++			 * attempt, in case the peer doesn't respond in a
1089
++			 * timely manner. We hold on to the socket until the
1090
++			 * timeout fires.
1091
++			 */
1092
++			sock_hold(sk);
1093
++			INIT_DELAYED_WORK(&hvsk->dwork,
1094
++					  hvsock_connect_timeout);
1095
++			schedule_delayed_work(&hvsk->dwork, timeout);
1096
++
1097
++			/* Skip ahead to preserve error code set above. */
1098
++			goto out_wait;
1099
++		}
1100
++
1101
++		release_sock(sk);
1102
++		timeout = schedule_timeout(timeout);
1103
++		lock_sock(sk);
1104
++
1105
++		if (signal_pending(current)) {
1106
++			ret = sock_intr_errno(timeout);
1107
++			goto out_wait_error;
1108
++		} else if (timeout == 0) {
1109
++			ret = -ETIMEDOUT;
1110
++			goto out_wait_error;
1111
++		}
1112
++
1113
++		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1114
++	}
1115
++
1116
++	ret = sk->sk_err ? -sk->sk_err : 0;
1117
++
1118
++out_wait_error:
1119
++	if (ret < 0) {
1120
++		sk->sk_state = SS_UNCONNECTED;
1121
++		sock->state = SS_UNCONNECTED;
1122
++	}
1123
++out_wait:
1124
++	finish_wait(sk_sleep(sk), &wait);
1125
++	return ret;
1126
++}
1127
++
1128
++static int hvsock_connect(struct socket *sock, struct sockaddr *addr,
1129
++			  int addr_len, int flags)
1130
++{
1131
++	struct sockaddr_hv *remote_addr;
1132
++	struct hvsock_sock *hvsk;
1133
++	struct sock *sk;
1134
++	int ret = 0;
1135
++
1136
++	sk = sock->sk;
1137
++	hvsk = sk_to_hvsock(sk);
1138
++
1139
++	lock_sock(sk);
1140
++
1141
++	switch (sock->state) {
1142
++	case SS_CONNECTED:
1143
++		ret = -EISCONN;
1144
++		goto out;
1145
++	case SS_DISCONNECTING:
1146
++		ret = -EINVAL;
1147
++		goto out;
1148
++	case SS_CONNECTING:
1149
++		/* This continues on so we can move sock into the SS_CONNECTED
1150
++		 * state once the connection has completed (at which point err
1151
++		 * will be set to zero also).  Otherwise, we will either wait
1152
++		 * for the connection or return -EALREADY should this be a
1153
++		 * non-blocking call.
1154
++		 */
1155
++		ret = -EALREADY;
1156
++		break;
1157
++	default:
1158
++		if ((sk->sk_state == SS_LISTEN) ||
1159
++		    hvsock_addr_cast(addr, addr_len, &remote_addr) != 0) {
1160
++			ret = -EINVAL;
1161
++			goto out;
1162
++		}
1163
++
1164
++		/* Set the remote address that we are connecting to. */
1165
++		memcpy(&hvsk->remote_addr, remote_addr,
1166
++		       sizeof(hvsk->remote_addr));
1167
++
1168
++		ret = hvsock_auto_bind(hvsk);
1169
++		if (ret)
1170
++			goto out;
1171
++
1172
++		sk->sk_state = SS_CONNECTING;
1173
++
1174
++		ret = vmbus_send_tl_connect_request(
1175
++					&hvsk->local_addr.shv_service_guid,
1176
++					&hvsk->remote_addr.shv_service_guid);
1177
++		if (ret < 0)
1178
++			goto out;
1179
++
1180
++		/* Mark sock as connecting and set the error code to in
1181
++		 * progress in case this is a non-blocking connect.
1182
++		 */
1183
++		sock->state = SS_CONNECTING;
1184
++		ret = -EINPROGRESS;
1185
++	}
1186
++
1187
++	ret = hvsock_connect_wait(sock, flags, ret);
1188
++out:
1189
++	release_sock(sk);
1190
++	return ret;
1191
++}
1192
++
1193
++static int hvsock_accept_wait(struct sock *listener,
1194
++			      struct socket *newsock, int flags)
1195
++{
1196
++	struct hvsock_sock *hvconnected;
1197
++	struct sock *connected;
1198
++
1199
++	DEFINE_WAIT(wait);
1200
++	long timeout;
1201
++
1202
++	int ret = 0;
1203
++
1204
++	/* Wait for children sockets to appear; these are the new sockets
1205
++	 * created upon connection establishment.
1206
++	 */
1207
++	timeout = sock_sndtimeo(listener, flags & O_NONBLOCK);
1208
++	prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE);
1209
++
1210
++	while ((connected = hvsock_dequeue_accept(listener)) == NULL &&
1211
++	       listener->sk_err == 0) {
1212
++		release_sock(listener);
1213
++		timeout = schedule_timeout(timeout);
1214
++		lock_sock(listener);
1215
++
1216
++		if (signal_pending(current)) {
1217
++			ret = sock_intr_errno(timeout);
1218
++			goto out_wait;
1219
++		} else if (timeout == 0) {
1220
++			ret = -EAGAIN;
1221
++			goto out_wait;
1222
++		}
1223
++
1224
++		prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE);
1225
++	}
1226
++
1227
++	if (listener->sk_err)
1228
++		ret = -listener->sk_err;
1229
++
1230
++	if (connected) {
1231
++		lock_sock(connected);
1232
++		hvconnected = sk_to_hvsock(connected);
1233
++
1234
++		if (!ret) {
1235
++			newsock->state = SS_CONNECTED;
1236
++			sock_graft(connected, newsock);
1237
++		}
1238
++		release_sock(connected);
1239
++		sock_put(connected);
1240
++	}
1241
++
1242
++out_wait:
1243
++	finish_wait(sk_sleep(listener), &wait);
1244
++	return ret;
1245
++}
1246
++
1247
++static int hvsock_accept(struct socket *sock, struct socket *newsock,
1248
++			 int flags)
1249
++{
1250
++	struct sock *listener;
1251
++	int ret;
1252
++
1253
++	listener = sock->sk;
1254
++
1255
++	lock_sock(listener);
1256
++
1257
++	if (sock->type != SOCK_STREAM) {
1258
++		ret = -EOPNOTSUPP;
1259
++		goto out;
1260
++	}
1261
++
1262
++	if (listener->sk_state != SS_LISTEN) {
1263
++		ret = -EINVAL;
1264
++		goto out;
1265
++	}
1266
++
1267
++	ret = hvsock_accept_wait(listener, newsock, flags);
1268
++out:
1269
++	release_sock(listener);
1270
++	return ret;
1271
++}
1272
++
1273
++static int hvsock_listen(struct socket *sock, int backlog)
1274
++{
1275
++	struct hvsock_sock *hvsk;
1276
++	struct sock *sk;
1277
++	int ret = 0;
1278
++
1279
++	sk = sock->sk;
1280
++	lock_sock(sk);
1281
++
1282
++	if (sock->type != SOCK_STREAM) {
1283
++		ret = -EOPNOTSUPP;
1284
++		goto out;
1285
++	}
1286
++
1287
++	if (sock->state != SS_UNCONNECTED) {
1288
++		ret = -EINVAL;
1289
++		goto out;
1290
++	}
1291
++
1292
++	if (backlog <= 0) {
1293
++		ret = -EINVAL;
1294
++		goto out;
1295
++	}
1296
++	if (backlog > HVSOCK_MAX_BACKLOG)
1297
++		backlog = HVSOCK_MAX_BACKLOG;
1298
++
1299
++	hvsk = sk_to_hvsock(sk);
1300
++	if (!hvsock_addr_bound(&hvsk->local_addr)) {
1301
++		ret = -EINVAL;
1302
++		goto out;
1303
++	}
1304
++
1305
++	sk->sk_ack_backlog = 0;
1306
++	sk->sk_max_ack_backlog = backlog;
1307
++	sk->sk_state = SS_LISTEN;
1308
++out:
1309
++	release_sock(sk);
1310
++	return ret;
1311
++}
1312
++
1313
++static int hvsock_sendmsg_wait(struct sock *sk, struct msghdr *msg,
1314
++			       size_t len)
1315
++{
1316
++	struct hvsock_sock *hvsk = sk_to_hvsock(sk);
1317
++	struct vmbus_channel *channel;
1318
++	size_t total_to_write = len;
1319
++	size_t total_written = 0;
1320
++	DEFINE_WAIT(wait);
1321
++	bool can_write;
1322
++	long timeout;
1323
++	int ret = -EIO;
1324
++
1325
++	timeout = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1326
++	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1327
++	channel = hvsk->channel;
1328
++
1329
++	while (total_to_write > 0) {
1330
++		size_t to_write, max_writable;
1331
++
1332
++		while (1) {
1333
++			get_ringbuffer_rw_status(channel, NULL, &can_write);
1334
++
1335
++			if (can_write || sk->sk_err != 0 ||
1336
++			    (sk->sk_shutdown & SEND_SHUTDOWN) ||
1337
++			    (hvsk->peer_shutdown & RCV_SHUTDOWN))
1338
++				break;
1339
++
1340
++			/* Don't wait for non-blocking sockets. */
1341
++			if (timeout == 0) {
1342
++				ret = -EAGAIN;
1343
++				goto out_wait;
1344
++			}
1345
++
1346
++			release_sock(sk);
1347
++
1348
++			timeout = schedule_timeout(timeout);
1349
++
1350
++			lock_sock(sk);
1351
++			if (signal_pending(current)) {
1352
++				ret = sock_intr_errno(timeout);
1353
++				goto out_wait;
1354
++			} else if (timeout == 0) {
1355
++				ret = -EAGAIN;
1356
++				goto out_wait;
1357
++			}
1358
++
1359
++			prepare_to_wait(sk_sleep(sk), &wait,
1360
++					TASK_INTERRUPTIBLE);
1361
++		}
1362
++
1363
++		/* These checks occur both as part of and after the loop
1364
++		 * conditional since we need to check before and after
1365
++		 * sleeping.
1366
++		 */
1367
++		if (sk->sk_err) {
1368
++			ret = -sk->sk_err;
1369
++			goto out_wait;
1370
++		} else if ((sk->sk_shutdown & SEND_SHUTDOWN) ||
1371
++			   (hvsk->peer_shutdown & RCV_SHUTDOWN)) {
1372
++			ret = -EPIPE;
1373
++			goto out_wait;
1374
++		}
1375
++
1376
++		/* Note: that write will only write as many bytes as possible
1377
++		 * in the ringbuffer. It is the caller's responsibility to
1378
++		 * check how many bytes we actually wrote.
1379
++		 */
1380
++		do {
1381
++			max_writable = get_ringbuffer_writable_bytes(channel);
1382
++			if (max_writable == 0)
1383
++				goto out_wait;
1384
++
1385
++			to_write = min_t(size_t, sizeof(hvsk->send->buf),
1386
++					 total_to_write);
1387
++			if (to_write > max_writable)
1388
++				to_write = max_writable;
1389
++
1390
++			ret = hvsock_get_send_buf(hvsk);
1391
++			if (ret < 0)
1392
++				goto out_wait;
1393
++
1394
++			ret = memcpy_from_msg(hvsk->send->buf, msg, to_write);
1395
++			if (ret != 0) {
1396
++				hvsock_put_send_buf(hvsk);
1397
++				goto out_wait;
1398
++			}
1399
++
1400
++			ret = hvsock_send_data(channel, hvsk, to_write);
1401
++			hvsock_put_send_buf(hvsk);
1402
++			if (ret != 0)
1403
++				goto out_wait;
1404
++
1405
++			total_written += to_write;
1406
++			total_to_write -= to_write;
1407
++		} while (total_to_write > 0);
1408
++	}
1409
++
1410
++out_wait:
1411
++	if (total_written > 0)
1412
++		ret = total_written;
1413
++
1414
++	finish_wait(sk_sleep(sk), &wait);
1415
++	return ret;
1416
++}
1417
++
1418
++static int hvsock_sendmsg(struct socket *sock, struct msghdr *msg,
1419
++			  size_t len)
1420
++{
1421
++	struct hvsock_sock *hvsk;
1422
++	struct sock *sk;
1423
++	int ret;
1424
++
1425
++	if (len == 0)
1426
++		return -EINVAL;
1427
++
1428
++	if (msg->msg_flags & ~MSG_DONTWAIT)
1429
++		return -EOPNOTSUPP;
1430
++
1431
++	sk = sock->sk;
1432
++	hvsk = sk_to_hvsock(sk);
1433
++
1434
++	lock_sock(sk);
1435
++
1436
++	/* Callers should not provide a destination with stream sockets. */
1437
++	if (msg->msg_namelen) {
1438
++		ret = -EOPNOTSUPP;
1439
++		goto out;
1440
++	}
1441
++
1442
++	/* Send data only if both sides are not shutdown in the direction. */
1443
++	if (sk->sk_shutdown & SEND_SHUTDOWN ||
1444
++	    hvsk->peer_shutdown & RCV_SHUTDOWN) {
1445
++		ret = -EPIPE;
1446
++		goto out;
1447
++	}
1448
++
1449
++	if (sk->sk_state != SS_CONNECTED ||
1450
++	    !hvsock_addr_bound(&hvsk->local_addr)) {
1451
++		ret = -ENOTCONN;
1452
++		goto out;
1453
++	}
1454
++
1455
++	if (!hvsock_addr_bound(&hvsk->remote_addr)) {
1456
++		ret = -EDESTADDRREQ;
1457
++		goto out;
1458
++	}
1459
++
1460
++	ret = hvsock_sendmsg_wait(sk, msg, len);
1461
++out:
1462
++	release_sock(sk);
1463
++
1464
++	/* ret should be a bigger-than-0 total_written or a negative err
1465
++	 * code.
1466
++	 */
1467
++	BUG_ON(ret == 0);
1468
++
1469
++	return ret;
1470
++}
1471
++
1472
++static int hvsock_recvmsg_wait(struct sock *sk, struct msghdr *msg,
1473
++			       size_t len, int flags)
1474
++{
1475
++	struct hvsock_sock *hvsk = sk_to_hvsock(sk);
1476
++	size_t to_read, total_to_read = len;
1477
++	struct vmbus_channel *channel;
1478
++	DEFINE_WAIT(wait);
1479
++	size_t copied = 0;
1480
++	bool can_read;
1481
++	long timeout;
1482
++	int ret = 0;
1483
++
1484
++	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1485
++	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1486
++	channel = hvsk->channel;
1487
++
1488
++	while (1) {
1489
++		bool need_refill = !hvsk->recv;
1490
++
1491
++		if (need_refill) {
1492
++			if (hvsk->peer_shutdown & SEND_SHUTDOWN)
1493
++				can_read = false;
1494
++			else
1495
++				get_ringbuffer_rw_status(channel, &can_read,
1496
++							 NULL);
1497
++		} else {
1498
++			can_read = true;
1499
++		}
1500
++
1501
++		if (can_read) {
1502
++			size_t payload_len;
1503
++
1504
++			if (need_refill) {
1505
++				ret = hvsock_get_recv_buf(hvsk);
1506
++				if (ret < 0) {
1507
++					if (copied > 0)
1508
++						ret = copied;
1509
++					goto out_wait;
1510
++				}
1511
++
1512
++				ret = hvsock_recv_data(channel, hvsk,
1513
++						       &payload_len);
1514
++				if (ret != 0 ||
1515
++				    payload_len > sizeof(hvsk->recv->buf)) {
1516
++					ret = -EIO;
1517
++					hvsock_put_recv_buf(hvsk);
1518
++					goto out_wait;
1519
++				}
1520
++
1521
++				if (payload_len == 0) {
1522
++					ret = copied;
1523
++					hvsock_put_recv_buf(hvsk);
1524
++					hvsk->peer_shutdown |= SEND_SHUTDOWN;
1525
++					break;
1526
++				}
1527
++
1528
++				hvsk->recv->data_len = payload_len;
1529
++				hvsk->recv->data_offset = 0;
1530
++			}
1531
++
1532
++			to_read = min_t(size_t, total_to_read,
1533
++					hvsk->recv->data_len);
1534
++
1535
++			ret = memcpy_to_msg(msg, hvsk->recv->buf +
1536
++					    hvsk->recv->data_offset,
1537
++					    to_read);
1538
++			if (ret != 0)
1539
++				break;
1540
++
1541
++			copied += to_read;
1542
++			total_to_read -= to_read;
1543
++
1544
++			hvsk->recv->data_len -= to_read;
1545
++
1546
++			if (hvsk->recv->data_len == 0)
1547
++				hvsock_put_recv_buf(hvsk);
1548
++			else
1549
++				hvsk->recv->data_offset += to_read;
1550
++
1551
++			if (total_to_read == 0)
1552
++				break;
1553
++		} else {
1554
++			if (sk->sk_err || (sk->sk_shutdown & RCV_SHUTDOWN) ||
1555
++			    (hvsk->peer_shutdown & SEND_SHUTDOWN))
1556
++				break;
1557
++
1558
++			/* Don't wait for non-blocking sockets. */
1559
++			if (timeout == 0) {
1560
++				ret = -EAGAIN;
1561
++				break;
1562
++			}
1563
++
1564
++			if (copied > 0)
1565
++				break;
1566
++
1567
++			release_sock(sk);
1568
++			timeout = schedule_timeout(timeout);
1569
++			lock_sock(sk);
1570
++
1571
++			if (signal_pending(current)) {
1572
++				ret = sock_intr_errno(timeout);
1573
++				break;
1574
++			} else if (timeout == 0) {
1575
++				ret = -EAGAIN;
1576
++				break;
1577
++			}
1578
++
1579
++			prepare_to_wait(sk_sleep(sk), &wait,
1580
++					TASK_INTERRUPTIBLE);
1581
++		}
1582
++	}
1583
++
1584
++	if (sk->sk_err)
1585
++		ret = -sk->sk_err;
1586
++	else if (sk->sk_shutdown & RCV_SHUTDOWN)
1587
++		ret = 0;
1588
++
1589
++	if (copied > 0)
1590
++		ret = copied;
1591
++out_wait:
1592
++	finish_wait(sk_sleep(sk), &wait);
1593
++	return ret;
1594
++}
1595
++
1596
++static int hvsock_recvmsg(struct socket *sock, struct msghdr *msg,
1597
++			  size_t len, int flags)
1598
++{
1599
++	struct sock *sk = sock->sk;
1600
++	int ret;
1601
++
1602
++	lock_sock(sk);
1603
++
1604
++	if (sk->sk_state != SS_CONNECTED) {
1605
++		/* Recvmsg is supposed to return 0 if a peer performs an
1606
++		 * orderly shutdown. Differentiate between that case and when a
1607
++		 * peer has not connected or a local shutdown occurred with the
1608
++		 * SOCK_DONE flag.
1609
++		 */
1610
++		if (sock_flag(sk, SOCK_DONE))
1611
++			ret = 0;
1612
++		else
1613
++			ret = -ENOTCONN;
1614
++
1615
++		goto out;
1616
++	}
1617
++
1618
++	/* We ignore msg->addr_name/len. */
1619
++	if (flags & ~MSG_DONTWAIT) {
1620
++		ret = -EOPNOTSUPP;
1621
++		goto out;
1622
++	}
1623
++
1624
++	/* We don't check peer_shutdown flag here since peer may actually shut
1625
++	 * down, but there can be data in the queue that a local socket can
1626
++	 * receive.
1627
++	 */
1628
++	if (sk->sk_shutdown & RCV_SHUTDOWN) {
1629
++		ret = 0;
1630
++		goto out;
1631
++	}
1632
++
1633
++	/* It is valid on Linux to pass in a zero-length receive buffer.  This
1634
++	 * is not an error.  We may as well bail out now.
1635
++	 */
1636
++	if (!len) {
1637
++		ret = 0;
1638
++		goto out;
1639
++	}
1640
++
1641
++	ret = hvsock_recvmsg_wait(sk, msg, len, flags);
1642
++out:
1643
++	release_sock(sk);
1644
++	return ret;
1645
++}
1646
++
1647
++static const struct proto_ops hvsock_ops = {
1648
++	.family = PF_HYPERV,
1649
++	.owner = THIS_MODULE,
1650
++	.release = hvsock_release,
1651
++	.bind = hvsock_bind,
1652
++	.connect = hvsock_connect,
1653
++	.socketpair = sock_no_socketpair,
1654
++	.accept = hvsock_accept,
1655
++	.getname = hvsock_getname,
1656
++	.poll = hvsock_poll,
1657
++	.ioctl = sock_no_ioctl,
1658
++	.listen = hvsock_listen,
1659
++	.shutdown = hvsock_shutdown,
1660
++	.setsockopt = sock_no_setsockopt,
1661
++	.getsockopt = sock_no_getsockopt,
1662
++	.sendmsg = hvsock_sendmsg,
1663
++	.recvmsg = hvsock_recvmsg,
1664
++	.mmap = sock_no_mmap,
1665
++	.sendpage = sock_no_sendpage,
1666
++};
1667
++
1668
++static int hvsock_create_sock(struct net *net, struct socket *sock,
1669
++			      int protocol, int kern)
1670
++{
1671
++	struct sock *sk;
1672
++
1673
++	if (protocol != 0 && protocol != SHV_PROTO_RAW)
1674
++		return -EPROTONOSUPPORT;
1675
++
1676
++	switch (sock->type) {
1677
++	case SOCK_STREAM:
1678
++		sock->ops = &hvsock_ops;
1679
++		break;
1680
++	default:
1681
++		return -ESOCKTNOSUPPORT;
1682
++	}
1683
++
1684
++	sock->state = SS_UNCONNECTED;
1685
++
1686
++	sk = hvsock_create(net, sock, GFP_KERNEL, 0);
1687
++	return sk ? 0 : -ENOMEM;
1688
++}
1689
++
1690
++static const struct net_proto_family hvsock_family_ops = {
1691
++	.family = AF_HYPERV,
1692
++	.create = hvsock_create_sock,
1693
++	.owner = THIS_MODULE,
1694
++};
1695
++
1696
++static int hvsock_probe(struct hv_device *hdev,
1697
++			const struct hv_vmbus_device_id *dev_id)
1698
++{
1699
++	struct vmbus_channel *channel = hdev->channel;
1700
++
1701
++	/* We ignore the error return code to suppress the unnecessary
1702
++	 * error message in vmbus_probe(): on error the host will rescind
1703
++	 * the offer in 30 seconds and we can do cleanup at that time.
1704
++	 */
1705
++	(void)hvsock_open_connection(channel);
1706
++
1707
++	return 0;
1708
++}
1709
++
1710
++static int hvsock_remove(struct hv_device *hdev)
1711
++{
1712
++	struct vmbus_channel *channel = hdev->channel;
1713
++
1714
++	vmbus_close(channel);
1715
++
1716
++	return 0;
1717
++}
1718
++
1719
++/* It's not really used. See vmbus_match() and vmbus_probe(). */
1720
++static const struct hv_vmbus_device_id id_table[] = {
1721
++	{},
1722
++};
1723
++
1724
++static struct hv_driver hvsock_drv = {
1725
++	.name		= "hv_sock",
1726
++	.hvsock		= true,
1727
++	.id_table	= id_table,
1728
++	.probe		= hvsock_probe,
1729
++	.remove		= hvsock_remove,
1730
++};
1731
++
1732
++static int __init hvsock_init(void)
1733
++{
1734
++	int ret;
1735
++
1736
++	if (vmbus_proto_version < VERSION_WIN10)
1737
++		return -ENODEV;
1738
++
1739
++	ret = vmbus_driver_register(&hvsock_drv);
1740
++	if (ret) {
1741
++		pr_err("failed to register hv_sock driver\n");
1742
++		return ret;
1743
++	}
1744
++
1745
++	ret = proto_register(&hvsock_proto, 0);
1746
++	if (ret) {
1747
++		pr_err("failed to register protocol\n");
1748
++		goto unreg_hvsock_drv;
1749
++	}
1750
++
1751
++	ret = sock_register(&hvsock_family_ops);
1752
++	if (ret) {
1753
++		pr_err("failed to register address family\n");
1754
++		goto unreg_proto;
1755
++	}
1756
++
1757
++	return 0;
1758
++
1759
++unreg_proto:
1760
++	proto_unregister(&hvsock_proto);
1761
++unreg_hvsock_drv:
1762
++	vmbus_driver_unregister(&hvsock_drv);
1763
++	return ret;
1764
++}
1765
++
1766
++static void __exit hvsock_exit(void)
1767
++{
1768
++	sock_unregister(AF_HYPERV);
1769
++	proto_unregister(&hvsock_proto);
1770
++	vmbus_driver_unregister(&hvsock_drv);
1771
++}
1772
++
1773
++module_init(hvsock_init);
1774
++module_exit(hvsock_exit);
1775
++
1776
++MODULE_DESCRIPTION("Hyper-V Sockets");
1777
++MODULE_LICENSE("Dual BSD/GPL");
1778
+-- 
1779
+2.13.0
1780
+
... ...
@@ -1378,6 +1378,7 @@ CONFIG_VSOCKETS=m
1378 1378
 CONFIG_VMWARE_VMCI_VSOCKETS=m
1379 1379
 CONFIG_VIRTIO_VSOCKETS=m
1380 1380
 CONFIG_VIRTIO_VSOCKETS_COMMON=m
1381
+CONFIG_HYPERV_SOCK=m
1381 1382
 CONFIG_NETLINK_DIAG=m
1382 1383
 CONFIG_MPLS=y
1383 1384
 CONFIG_NET_MPLS_GSO=m
... ...
@@ -1347,6 +1347,7 @@ CONFIG_VSOCKETS=m
1347 1347
 CONFIG_VMWARE_VMCI_VSOCKETS=m
1348 1348
 CONFIG_VIRTIO_VSOCKETS=m
1349 1349
 CONFIG_VIRTIO_VSOCKETS_COMMON=m
1350
+CONFIG_HYPERV_SOCK=m
1350 1351
 CONFIG_NETLINK_DIAG=m
1351 1352
 CONFIG_MPLS=y
1352 1353
 CONFIG_NET_MPLS_GSO=m
... ...
@@ -2,7 +2,7 @@
2 2
 Summary:        Kernel
3 3
 Name:           linux-secure
4 4
 Version:        4.9.38
5
-Release:        3%{?dist}
5
+Release:        4%{?dist}
6 6
 License:        GPLv2
7 7
 URL:            http://www.kernel.org/
8 8
 Group:          System Environment/Kernel
... ...
@@ -31,8 +31,20 @@ Patch12:        x86-vmware-sta.patch
31 31
 Patch13:        0001-NOWRITEEXEC-and-PAX-features-MPROTECT-EMUTRAMP.patch
32 32
 Patch14:        0002-Added-rap_plugin.patch
33 33
 Patch15:        0003-Added-PAX_RANDKSTACK.patch
34
+# HyperV Patches
35
+Patch16:        0004-vmbus-Don-t-spam-the-logs-with-unknown-GUIDs.patch
36
+Patch17:        0005-Drivers-hv-utils-Fix-the-mapping-between-host-versio.patch
37
+Patch18:        0006-Drivers-hv-vss-Improve-log-messages.patch
38
+Patch19:        0007-Drivers-hv-vss-Operation-timeouts-should-match-host-.patch
39
+Patch20:        0008-Drivers-hv-vmbus-Use-all-supported-IC-versions-to-ne.patch
40
+Patch21:        0009-Drivers-hv-Log-the-negotiated-IC-versions.patch
41
+Patch22:        0010-vmbus-fix-missed-ring-events-on-boot.patch
42
+Patch23:        0011-vmbus-remove-goto-error_clean_msglist-in-vmbus_open.patch
43
+Patch24:        0012-vmbus-dynamically-enqueue-dequeue-the-channel-on-vmb.patch
44
+Patch25:        0013-vmbus-fix-the-missed-signaling-in-hv_signal_on_read.patch
45
+Patch26:        0014-hv_sock-introduce-Hyper-V-Sockets.patch
34 46
 # NSX requirements (should be removed)
35
-Patch16:        LKCM.patch
47
+Patch99:        LKCM.patch
36 48
 BuildRequires:  bc
37 49
 BuildRequires:  kbd
38 50
 BuildRequires:  kmod-devel
... ...
@@ -113,8 +125,19 @@ EOF
113 113
 %patch13 -p1
114 114
 %patch14 -p1
115 115
 %patch15 -p1
116
+%patch16 -p1
117
+%patch17 -p1
118
+%patch19 -p1
119
+%patch20 -p1
120
+%patch21 -p1
121
+%patch22 -p1
122
+%patch23 -p1
123
+%patch24 -p1
124
+%patch25 -p1
125
+%patch26 -p1
126
+
116 127
 pushd ..
117
-%patch16 -p0
128
+%patch99 -p0
118 129
 popd
119 130
 
120 131
 %build
... ...
@@ -180,7 +203,7 @@ EOF
180 180
 # Register myself to initramfs
181 181
 mkdir -p %{buildroot}/%{_localstatedir}/lib/initramfs/kernel
182 182
 cat > %{buildroot}/%{_localstatedir}/lib/initramfs/kernel/%{uname_r} << "EOF"
183
+--add-drivers "tmem xen-acpi-processor xen-evtchn xen-gntalloc xen-gntdev xen-privcmd xen-pciback xenfs hv_utils hv_vmbus hv_storvsc hv_netvsc hv_sock hv_balloon cn"
183 184
 EOF
184 185
 
185 186
 # cleanup dangling symlinks
... ...
@@ -227,6 +250,8 @@ ln -sf linux-%{uname_r}.cfg /boot/photon.cfg
227 227
 /usr/src/linux-headers-%{uname_r}
228 228
 
229 229
 %changelog
230
+*   Fri Jul 21 2017 Anish Swaminathan <anishs@vmware.com> 4.9.38-4
231
+-   Add patches in Hyperv codebase
230 232
 *   Fri Jul 21 2017 Anish Swaminathan <anishs@vmware.com> 4.9.38-3
231 233
 -   Add missing hyperv drivers
232 234
 *   Thu Jul 20 2017 Alexey Makhalov <amakhalov@vmware.com> 4.9.38-2
... ...
@@ -2,7 +2,7 @@
2 2
 Summary:        Kernel
3 3
 Name:           linux
4 4
 Version:        4.9.38
5
-Release:        3%{?dist}
5
+Release:        4%{?dist}
6 6
 License:    	GPLv2
7 7
 URL:        	http://www.kernel.org/
8 8
 Group:        	System Environment/Kernel
... ...
@@ -28,6 +28,19 @@ Patch9:         SUNRPC-Do-not-reuse-srcport-for-TIME_WAIT-socket.patch
28 28
 Patch10:        SUNRPC-xs_bind-uses-ip_local_reserved_ports.patch
29 29
 Patch11:        net-9p-vsock.patch
30 30
 Patch12:        x86-vmware-sta.patch
31
+#HyperV patches
32
+Patch13:        0004-vmbus-Don-t-spam-the-logs-with-unknown-GUIDs.patch
33
+Patch14:        0005-Drivers-hv-utils-Fix-the-mapping-between-host-versio.patch
34
+Patch15:        0006-Drivers-hv-vss-Improve-log-messages.patch
35
+Patch16:        0007-Drivers-hv-vss-Operation-timeouts-should-match-host-.patch
36
+Patch17:        0008-Drivers-hv-vmbus-Use-all-supported-IC-versions-to-ne.patch
37
+Patch18:        0009-Drivers-hv-Log-the-negotiated-IC-versions.patch
38
+Patch19:        0010-vmbus-fix-missed-ring-events-on-boot.patch
39
+Patch20:        0011-vmbus-remove-goto-error_clean_msglist-in-vmbus_open.patch
40
+Patch21:        0012-vmbus-dynamically-enqueue-dequeue-the-channel-on-vmb.patch
41
+Patch22:        0013-vmbus-fix-the-missed-signaling-in-hv_signal_on_read.patch
42
+Patch23:        0014-hv_sock-introduce-Hyper-V-Sockets.patch
43
+
31 44
 BuildRequires:  bc
32 45
 BuildRequires:  kbd
33 46
 BuildRequires:  kmod-devel
... ...
@@ -108,6 +121,16 @@ This package contains the 'perf' performance analysis tools for Linux kernel.
108 108
 %patch10 -p1
109 109
 %patch11 -p1
110 110
 %patch12 -p1
111
+%patch13 -p1
112
+%patch14 -p1
113
+%patch15 -p1
114
+%patch16 -p1
115
+%patch17 -p1
116
+%patch19 -p1
117
+%patch20 -p1
118
+%patch21 -p1
119
+%patch22 -p1
120
+%patch23 -p1
111 121
 
112 122
 %build
113 123
 make mrproper
... ...
@@ -180,7 +203,7 @@ EOF
180 180
 # Register myself to initramfs
181 181
 mkdir -p %{buildroot}/%{_localstatedir}/lib/initramfs/kernel
182 182
 cat > %{buildroot}/%{_localstatedir}/lib/initramfs/kernel/%{uname_r} << "EOF"
183
+--add-drivers "tmem xen-acpi-processor xen-evtchn xen-gntalloc xen-gntdev xen-privcmd xen-pciback xenfs hv_utils hv_vmbus hv_storvsc hv_netvsc hv_sock hv_balloon cn"
183 184
 EOF
184 185
 
185 186
 #    Cleanup dangling symlinks
... ...
@@ -267,6 +290,8 @@ ln -sf %{name}-%{uname_r}.cfg /boot/photon.cfg
267 267
 /usr/share/doc/*
268 268
 
269 269
 %changelog
270
+*   Fri Jul 21 2017 Anish Swaminathan <anishs@vmware.com> 4.9.38-4
271
+-   Add patches in Hyperv codebase
270 272
 *   Fri Jul 21 2017 Anish Swaminathan <anishs@vmware.com> 4.9.38-3
271 273
 -   Add missing hyperv drivers
272 274
 *   Thu Jul 20 2017 Alexey Makhalov <amakhalov@vmware.com> 4.9.38-2