Browse code

vendor: github.com/moby/ipvs v1.1.0

full diff: https://github.com/moby/ipvs/compare/v1.0.2...v1.1.0

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>

Sebastiaan van Stijn authored on 2023/01/15 01:25:22
Showing 12 changed files
... ...
@@ -57,7 +57,7 @@ require (
57 57
 	github.com/miekg/dns v1.1.43
58 58
 	github.com/mistifyio/go-zfs v2.1.2-0.20190413222219-f784269be439+incompatible
59 59
 	github.com/moby/buildkit v0.10.6
60
-	github.com/moby/ipvs v1.0.2
60
+	github.com/moby/ipvs v1.1.0
61 61
 	github.com/moby/locker v1.0.1
62 62
 	github.com/moby/patternmatcher v0.5.0
63 63
 	github.com/moby/pubsub v1.0.0
... ...
@@ -807,8 +807,8 @@ github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh
807 807
 github.com/mitchellh/osext v0.0.0-20151018003038-5e2d6d41470f/go.mod h1:OkQIRizQZAeMln+1tSwduZz7+Af5oFlKirV/MSYes2A=
808 808
 github.com/moby/buildkit v0.10.6 h1:DJlEuLIgnu34HQKF4n9Eg6q2YqQVC0eOpMb4p2eRS2w=
809 809
 github.com/moby/buildkit v0.10.6/go.mod h1:tQuuyTWtOb9D+RE425cwOCUkX0/oZ+5iBZ+uWpWQ9bU=
810
-github.com/moby/ipvs v1.0.2 h1:NSbzuRTvfneftLU3VwPU5QuA6NZ0IUmqq9+VHcQxqHw=
811
-github.com/moby/ipvs v1.0.2/go.mod h1:2pngiyseZbIKXNv7hsKj3O9UEz30c53MT9005gt2hxQ=
810
+github.com/moby/ipvs v1.1.0 h1:ONN4pGaZQgAx+1Scz5RvWV4Q7Gb+mvfRh3NsPS+1XQQ=
811
+github.com/moby/ipvs v1.1.0/go.mod h1:4VJMWuf098bsUMmZEiD4Tjk/O7mOn3l1PTD3s4OoYAs=
812 812
 github.com/moby/locker v1.0.1 h1:fOXqR41zeveg4fFODix+1Ch4mj/gT0NE1XJbp/epuBg=
813 813
 github.com/moby/locker v1.0.1/go.mod h1:S7SDdo5zpBK84bzzVlKr2V0hz+7x9hWbYC/kq7oQppc=
814 814
 github.com/moby/patternmatcher v0.5.0 h1:YCZgJOeULcxLw1Q+sVR636pmS7sPEn1Qo2iAN6M7DBo=
815 815
new file mode 100644
... ...
@@ -0,0 +1,8 @@
0
+linters:
1
+  disable-all: true
2
+  enable:
3
+    - gofmt
4
+    - govet
5
+    - ineffassign
6
+    - misspell
7
+    - revive
... ...
@@ -1,4 +1,5 @@
1
-Apache License
1
+
2
+                                 Apache License
2 3
                            Version 2.0, January 2004
3 4
                         http://www.apache.org/licenses/
4 5
 
... ...
@@ -178,7 +179,7 @@ Apache License
178 178
    APPENDIX: How to apply the Apache License to your work.
179 179
 
180 180
       To apply the Apache License to your work, attach the following
181
-      boilerplate notice, with the fields enclosed by brackets "{}"
181
+      boilerplate notice, with the fields enclosed by brackets "[]"
182 182
       replaced with your own identifying information. (Don't include
183 183
       the brackets!)  The text should be enclosed in the appropriate
184 184
       comment syntax for the file format. We also recommend that a
... ...
@@ -186,7 +187,7 @@ Apache License
186 186
       same "printed page" as the copyright notice for easier
187 187
       identification within third-party archives.
188 188
 
189
-   Copyright {yyyy} {name of copyright owner}
189
+   Copyright [yyyy] [name of copyright owner]
190 190
 
191 191
    Licensed under the Apache License, Version 2.0 (the "License");
192 192
    you may not use this file except in compliance with the License.
... ...
@@ -199,4 +200,3 @@ Apache License
199 199
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 200
    See the License for the specific language governing permissions and
201 201
    limitations under the License.
202
-
... ...
@@ -31,4 +31,5 @@ func main() {
31 31
 Want to hack on ipvs? [Docker's contributions guidelines](https://github.com/docker/docker/blob/master/CONTRIBUTING.md) apply.
32 32
 
33 33
 ## Copyright and license
34
-Code and documentation copyright 2015 Docker, inc. Code released under the Apache 2.0 license. Docs released under Creative commons.
34
+
35
+Copyright 2015 Docker, inc. Code released under the [Apache 2.0 license](LICENSE).
35 36
deleted file mode 100644
... ...
@@ -1,178 +0,0 @@
1
-// +build linux
2
-
3
-package ipvs
4
-
5
-const (
6
-	genlCtrlID = 0x10
7
-)
8
-
9
-// GENL control commands
10
-const (
11
-	genlCtrlCmdUnspec uint8 = iota
12
-	genlCtrlCmdNewFamily
13
-	genlCtrlCmdDelFamily
14
-	genlCtrlCmdGetFamily
15
-)
16
-
17
-// GENL family attributes
18
-const (
19
-	genlCtrlAttrUnspec int = iota
20
-	genlCtrlAttrFamilyID
21
-	genlCtrlAttrFamilyName
22
-)
23
-
24
-// IPVS genl commands
25
-const (
26
-	ipvsCmdUnspec uint8 = iota
27
-	ipvsCmdNewService
28
-	ipvsCmdSetService
29
-	ipvsCmdDelService
30
-	ipvsCmdGetService
31
-	ipvsCmdNewDest
32
-	ipvsCmdSetDest
33
-	ipvsCmdDelDest
34
-	ipvsCmdGetDest
35
-	ipvsCmdNewDaemon
36
-	ipvsCmdDelDaemon
37
-	ipvsCmdGetDaemon
38
-	ipvsCmdSetConfig
39
-	ipvsCmdGetConfig
40
-	ipvsCmdSetInfo
41
-	ipvsCmdGetInfo
42
-	ipvsCmdZero
43
-	ipvsCmdFlush
44
-)
45
-
46
-// Attributes used in the first level of commands
47
-const (
48
-	ipvsCmdAttrUnspec int = iota
49
-	ipvsCmdAttrService
50
-	ipvsCmdAttrDest
51
-	ipvsCmdAttrDaemon
52
-	ipvsCmdAttrTimeoutTCP
53
-	ipvsCmdAttrTimeoutTCPFin
54
-	ipvsCmdAttrTimeoutUDP
55
-)
56
-
57
-// Attributes used to describe a service. Used inside nested attribute
58
-// ipvsCmdAttrService
59
-const (
60
-	ipvsSvcAttrUnspec int = iota
61
-	ipvsSvcAttrAddressFamily
62
-	ipvsSvcAttrProtocol
63
-	ipvsSvcAttrAddress
64
-	ipvsSvcAttrPort
65
-	ipvsSvcAttrFWMark
66
-	ipvsSvcAttrSchedName
67
-	ipvsSvcAttrFlags
68
-	ipvsSvcAttrTimeout
69
-	ipvsSvcAttrNetmask
70
-	ipvsSvcAttrStats
71
-	ipvsSvcAttrPEName
72
-)
73
-
74
-// Attributes used to describe a destination (real server). Used
75
-// inside nested attribute ipvsCmdAttrDest.
76
-const (
77
-	ipvsDestAttrUnspec int = iota
78
-	ipvsDestAttrAddress
79
-	ipvsDestAttrPort
80
-	ipvsDestAttrForwardingMethod
81
-	ipvsDestAttrWeight
82
-	ipvsDestAttrUpperThreshold
83
-	ipvsDestAttrLowerThreshold
84
-	ipvsDestAttrActiveConnections
85
-	ipvsDestAttrInactiveConnections
86
-	ipvsDestAttrPersistentConnections
87
-	ipvsDestAttrStats
88
-	ipvsDestAttrAddressFamily
89
-)
90
-
91
-// IPVS Statistics constants
92
-
93
-const (
94
-	ipvsStatsUnspec int = iota
95
-	ipvsStatsConns
96
-	ipvsStatsPktsIn
97
-	ipvsStatsPktsOut
98
-	ipvsStatsBytesIn
99
-	ipvsStatsBytesOut
100
-	ipvsStatsCPS
101
-	ipvsStatsPPSIn
102
-	ipvsStatsPPSOut
103
-	ipvsStatsBPSIn
104
-	ipvsStatsBPSOut
105
-)
106
-
107
-// Destination forwarding methods
108
-const (
109
-	// ConnectionFlagFwdmask indicates the mask in the connection
110
-	// flags which is used by forwarding method bits.
111
-	ConnectionFlagFwdMask = 0x0007
112
-
113
-	// ConnectionFlagMasq is used for masquerade forwarding method.
114
-	ConnectionFlagMasq = 0x0000
115
-
116
-	// ConnectionFlagLocalNode is used for local node forwarding
117
-	// method.
118
-	ConnectionFlagLocalNode = 0x0001
119
-
120
-	// ConnectionFlagTunnel is used for tunnel mode forwarding
121
-	// method.
122
-	ConnectionFlagTunnel = 0x0002
123
-
124
-	// ConnectionFlagDirectRoute is used for direct routing
125
-	// forwarding method.
126
-	ConnectionFlagDirectRoute = 0x0003
127
-)
128
-
129
-const (
130
-	// RoundRobin distributes jobs equally amongst the available
131
-	// real servers.
132
-	RoundRobin = "rr"
133
-
134
-	// LeastConnection assigns more jobs to real servers with
135
-	// fewer active jobs.
136
-	LeastConnection = "lc"
137
-
138
-	// DestinationHashing assigns jobs to servers through looking
139
-	// up a statically assigned hash table by their destination IP
140
-	// addresses.
141
-	DestinationHashing = "dh"
142
-
143
-	// SourceHashing assigns jobs to servers through looking up
144
-	// a statically assigned hash table by their source IP
145
-	// addresses.
146
-	SourceHashing = "sh"
147
-
148
-	// WeightedRoundRobin assigns jobs to real servers proportionally
149
-	// to there real servers' weight. Servers with higher weights
150
-	// receive new jobs first and get more jobs than servers
151
-	// with lower weights. Servers with equal weights get
152
-	// an equal distribution of new jobs
153
-	WeightedRoundRobin = "wrr"
154
-
155
-	// WeightedLeastConnection assigns more jobs to servers
156
-	// with fewer jobs and relative to the real servers' weight
157
-	WeightedLeastConnection = "wlc"
158
-)
159
-
160
-const (
161
-	// ConnFwdMask is a mask for the fwd methods
162
-	ConnFwdMask = 0x0007
163
-
164
-	// ConnFwdMasq denotes forwarding via masquerading/NAT
165
-	ConnFwdMasq = 0x0000
166
-
167
-	// ConnFwdLocalNode denotes forwarding to a local node
168
-	ConnFwdLocalNode = 0x0001
169
-
170
-	// ConnFwdTunnel denotes forwarding via a tunnel
171
-	ConnFwdTunnel = 0x0002
172
-
173
-	// ConnFwdDirectRoute denotes forwarding via direct routing
174
-	ConnFwdDirectRoute = 0x0003
175
-
176
-	// ConnFwdBypass denotes forwarding while bypassing the cache
177
-	ConnFwdBypass = 0x0004
178
-)
179 1
new file mode 100644
... ...
@@ -0,0 +1,176 @@
0
+package ipvs
1
+
2
+const (
3
+	genlCtrlID = 0x10
4
+)
5
+
6
+// GENL control commands
7
+const (
8
+	genlCtrlCmdUnspec uint8 = iota
9
+	genlCtrlCmdNewFamily
10
+	genlCtrlCmdDelFamily
11
+	genlCtrlCmdGetFamily
12
+)
13
+
14
+// GENL family attributes
15
+const (
16
+	genlCtrlAttrUnspec int = iota
17
+	genlCtrlAttrFamilyID
18
+	genlCtrlAttrFamilyName
19
+)
20
+
21
+// IPVS genl commands
22
+const (
23
+	ipvsCmdUnspec uint8 = iota
24
+	ipvsCmdNewService
25
+	ipvsCmdSetService
26
+	ipvsCmdDelService
27
+	ipvsCmdGetService
28
+	ipvsCmdNewDest
29
+	ipvsCmdSetDest
30
+	ipvsCmdDelDest
31
+	ipvsCmdGetDest
32
+	ipvsCmdNewDaemon
33
+	ipvsCmdDelDaemon
34
+	ipvsCmdGetDaemon
35
+	ipvsCmdSetConfig
36
+	ipvsCmdGetConfig
37
+	ipvsCmdSetInfo
38
+	ipvsCmdGetInfo
39
+	ipvsCmdZero
40
+	ipvsCmdFlush
41
+)
42
+
43
+// Attributes used in the first level of commands
44
+const (
45
+	ipvsCmdAttrUnspec int = iota
46
+	ipvsCmdAttrService
47
+	ipvsCmdAttrDest
48
+	ipvsCmdAttrDaemon
49
+	ipvsCmdAttrTimeoutTCP
50
+	ipvsCmdAttrTimeoutTCPFin
51
+	ipvsCmdAttrTimeoutUDP
52
+)
53
+
54
+// Attributes used to describe a service. Used inside nested attribute
55
+// ipvsCmdAttrService
56
+const (
57
+	ipvsSvcAttrUnspec int = iota
58
+	ipvsSvcAttrAddressFamily
59
+	ipvsSvcAttrProtocol
60
+	ipvsSvcAttrAddress
61
+	ipvsSvcAttrPort
62
+	ipvsSvcAttrFWMark
63
+	ipvsSvcAttrSchedName
64
+	ipvsSvcAttrFlags
65
+	ipvsSvcAttrTimeout
66
+	ipvsSvcAttrNetmask
67
+	ipvsSvcAttrStats
68
+	ipvsSvcAttrPEName
69
+)
70
+
71
+// Attributes used to describe a destination (real server). Used
72
+// inside nested attribute ipvsCmdAttrDest.
73
+const (
74
+	ipvsDestAttrUnspec int = iota
75
+	ipvsDestAttrAddress
76
+	ipvsDestAttrPort
77
+	ipvsDestAttrForwardingMethod
78
+	ipvsDestAttrWeight
79
+	ipvsDestAttrUpperThreshold
80
+	ipvsDestAttrLowerThreshold
81
+	ipvsDestAttrActiveConnections
82
+	ipvsDestAttrInactiveConnections
83
+	ipvsDestAttrPersistentConnections
84
+	ipvsDestAttrStats
85
+	ipvsDestAttrAddressFamily
86
+)
87
+
88
+// IPVS Statistics constants
89
+
90
+const (
91
+	ipvsStatsUnspec int = iota
92
+	ipvsStatsConns
93
+	ipvsStatsPktsIn
94
+	ipvsStatsPktsOut
95
+	ipvsStatsBytesIn
96
+	ipvsStatsBytesOut
97
+	ipvsStatsCPS
98
+	ipvsStatsPPSIn
99
+	ipvsStatsPPSOut
100
+	ipvsStatsBPSIn
101
+	ipvsStatsBPSOut
102
+)
103
+
104
+// Destination forwarding methods
105
+const (
106
+	// ConnectionFlagFwdmask indicates the mask in the connection
107
+	// flags which is used by forwarding method bits.
108
+	ConnectionFlagFwdMask = 0x0007
109
+
110
+	// ConnectionFlagMasq is used for masquerade forwarding method.
111
+	ConnectionFlagMasq = 0x0000
112
+
113
+	// ConnectionFlagLocalNode is used for local node forwarding
114
+	// method.
115
+	ConnectionFlagLocalNode = 0x0001
116
+
117
+	// ConnectionFlagTunnel is used for tunnel mode forwarding
118
+	// method.
119
+	ConnectionFlagTunnel = 0x0002
120
+
121
+	// ConnectionFlagDirectRoute is used for direct routing
122
+	// forwarding method.
123
+	ConnectionFlagDirectRoute = 0x0003
124
+)
125
+
126
+const (
127
+	// RoundRobin distributes jobs equally amongst the available
128
+	// real servers.
129
+	RoundRobin = "rr"
130
+
131
+	// LeastConnection assigns more jobs to real servers with
132
+	// fewer active jobs.
133
+	LeastConnection = "lc"
134
+
135
+	// DestinationHashing assigns jobs to servers through looking
136
+	// up a statically assigned hash table by their destination IP
137
+	// addresses.
138
+	DestinationHashing = "dh"
139
+
140
+	// SourceHashing assigns jobs to servers through looking up
141
+	// a statically assigned hash table by their source IP
142
+	// addresses.
143
+	SourceHashing = "sh"
144
+
145
+	// WeightedRoundRobin assigns jobs to real servers proportionally
146
+	// to there real servers' weight. Servers with higher weights
147
+	// receive new jobs first and get more jobs than servers
148
+	// with lower weights. Servers with equal weights get
149
+	// an equal distribution of new jobs
150
+	WeightedRoundRobin = "wrr"
151
+
152
+	// WeightedLeastConnection assigns more jobs to servers
153
+	// with fewer jobs and relative to the real servers' weight
154
+	WeightedLeastConnection = "wlc"
155
+)
156
+
157
+const (
158
+	// ConnFwdMask is a mask for the fwd methods
159
+	ConnFwdMask = 0x0007
160
+
161
+	// ConnFwdMasq denotes forwarding via masquerading/NAT
162
+	ConnFwdMasq = 0x0000
163
+
164
+	// ConnFwdLocalNode denotes forwarding to a local node
165
+	ConnFwdLocalNode = 0x0001
166
+
167
+	// ConnFwdTunnel denotes forwarding via a tunnel
168
+	ConnFwdTunnel = 0x0002
169
+
170
+	// ConnFwdDirectRoute denotes forwarding via direct routing
171
+	ConnFwdDirectRoute = 0x0003
172
+
173
+	// ConnFwdBypass denotes forwarding while bypassing the cache
174
+	ConnFwdBypass = 0x0004
175
+)
0 176
deleted file mode 100644
... ...
@@ -1,206 +0,0 @@
1
-// +build linux
2
-
3
-package ipvs
4
-
5
-import (
6
-	"fmt"
7
-	"net"
8
-	"time"
9
-
10
-	"github.com/vishvananda/netlink/nl"
11
-	"github.com/vishvananda/netns"
12
-	"golang.org/x/sys/unix"
13
-)
14
-
15
-const (
16
-	netlinkRecvSocketsTimeout = 3 * time.Second
17
-	netlinkSendSocketTimeout  = 30 * time.Second
18
-)
19
-
20
-// Service defines an IPVS service in its entirety.
21
-type Service struct {
22
-	// Virtual service address.
23
-	Address  net.IP
24
-	Protocol uint16
25
-	Port     uint16
26
-	FWMark   uint32 // Firewall mark of the service.
27
-
28
-	// Virtual service options.
29
-	SchedName     string
30
-	Flags         uint32
31
-	Timeout       uint32
32
-	Netmask       uint32
33
-	AddressFamily uint16
34
-	PEName        string
35
-	Stats         SvcStats
36
-}
37
-
38
-// SvcStats defines an IPVS service statistics
39
-type SvcStats struct {
40
-	Connections uint32
41
-	PacketsIn   uint32
42
-	PacketsOut  uint32
43
-	BytesIn     uint64
44
-	BytesOut    uint64
45
-	CPS         uint32
46
-	BPSOut      uint32
47
-	PPSIn       uint32
48
-	PPSOut      uint32
49
-	BPSIn       uint32
50
-}
51
-
52
-// Destination defines an IPVS destination (real server) in its
53
-// entirety.
54
-type Destination struct {
55
-	Address             net.IP
56
-	Port                uint16
57
-	Weight              int
58
-	ConnectionFlags     uint32
59
-	AddressFamily       uint16
60
-	UpperThreshold      uint32
61
-	LowerThreshold      uint32
62
-	ActiveConnections   int
63
-	InactiveConnections int
64
-	Stats               DstStats
65
-}
66
-
67
-// DstStats defines IPVS destination (real server) statistics
68
-type DstStats SvcStats
69
-
70
-// Config defines IPVS timeout configuration
71
-type Config struct {
72
-	TimeoutTCP    time.Duration
73
-	TimeoutTCPFin time.Duration
74
-	TimeoutUDP    time.Duration
75
-}
76
-
77
-// Handle provides a namespace specific ipvs handle to program ipvs
78
-// rules.
79
-type Handle struct {
80
-	seq  uint32
81
-	sock *nl.NetlinkSocket
82
-}
83
-
84
-// New provides a new ipvs handle in the namespace pointed to by the
85
-// passed path. It will return a valid handle or an error in case an
86
-// error occurred while creating the handle.
87
-func New(path string) (*Handle, error) {
88
-	setup()
89
-
90
-	n := netns.None()
91
-	if path != "" {
92
-		var err error
93
-		n, err = netns.GetFromPath(path)
94
-		if err != nil {
95
-			return nil, err
96
-		}
97
-	}
98
-	defer n.Close()
99
-
100
-	sock, err := nl.GetNetlinkSocketAt(n, netns.None(), unix.NETLINK_GENERIC)
101
-	if err != nil {
102
-		return nil, err
103
-	}
104
-	// Add operation timeout to avoid deadlocks
105
-	tv := unix.NsecToTimeval(netlinkSendSocketTimeout.Nanoseconds())
106
-	if err := sock.SetSendTimeout(&tv); err != nil {
107
-		return nil, err
108
-	}
109
-	tv = unix.NsecToTimeval(netlinkRecvSocketsTimeout.Nanoseconds())
110
-	if err := sock.SetReceiveTimeout(&tv); err != nil {
111
-		return nil, err
112
-	}
113
-
114
-	return &Handle{sock: sock}, nil
115
-}
116
-
117
-// Close closes the ipvs handle. The handle is invalid after Close
118
-// returns.
119
-func (i *Handle) Close() {
120
-	if i.sock != nil {
121
-		i.sock.Close()
122
-	}
123
-}
124
-
125
-// NewService creates a new ipvs service in the passed handle.
126
-func (i *Handle) NewService(s *Service) error {
127
-	return i.doCmd(s, nil, ipvsCmdNewService)
128
-}
129
-
130
-// IsServicePresent queries for the ipvs service in the passed handle.
131
-func (i *Handle) IsServicePresent(s *Service) bool {
132
-	return nil == i.doCmd(s, nil, ipvsCmdGetService)
133
-}
134
-
135
-// UpdateService updates an already existing service in the passed
136
-// handle.
137
-func (i *Handle) UpdateService(s *Service) error {
138
-	return i.doCmd(s, nil, ipvsCmdSetService)
139
-}
140
-
141
-// DelService deletes an already existing service in the passed
142
-// handle.
143
-func (i *Handle) DelService(s *Service) error {
144
-	return i.doCmd(s, nil, ipvsCmdDelService)
145
-}
146
-
147
-// Flush deletes all existing services in the passed
148
-// handle.
149
-func (i *Handle) Flush() error {
150
-	_, err := i.doCmdWithoutAttr(ipvsCmdFlush)
151
-	return err
152
-}
153
-
154
-// NewDestination creates a new real server in the passed ipvs
155
-// service which should already be existing in the passed handle.
156
-func (i *Handle) NewDestination(s *Service, d *Destination) error {
157
-	return i.doCmd(s, d, ipvsCmdNewDest)
158
-}
159
-
160
-// UpdateDestination updates an already existing real server in the
161
-// passed ipvs service in the passed handle.
162
-func (i *Handle) UpdateDestination(s *Service, d *Destination) error {
163
-	return i.doCmd(s, d, ipvsCmdSetDest)
164
-}
165
-
166
-// DelDestination deletes an already existing real server in the
167
-// passed ipvs service in the passed handle.
168
-func (i *Handle) DelDestination(s *Service, d *Destination) error {
169
-	return i.doCmd(s, d, ipvsCmdDelDest)
170
-}
171
-
172
-// GetServices returns an array of services configured on the Node
173
-func (i *Handle) GetServices() ([]*Service, error) {
174
-	return i.doGetServicesCmd(nil)
175
-}
176
-
177
-// GetDestinations returns an array of Destinations configured for this Service
178
-func (i *Handle) GetDestinations(s *Service) ([]*Destination, error) {
179
-	return i.doGetDestinationsCmd(s, nil)
180
-}
181
-
182
-// GetService gets details of a specific IPVS services, useful in updating statisics etc.,
183
-func (i *Handle) GetService(s *Service) (*Service, error) {
184
-
185
-	res, err := i.doGetServicesCmd(s)
186
-	if err != nil {
187
-		return nil, err
188
-	}
189
-
190
-	// We are looking for exactly one service otherwise error out
191
-	if len(res) != 1 {
192
-		return nil, fmt.Errorf("Expected only one service obtained=%d", len(res))
193
-	}
194
-
195
-	return res[0], nil
196
-}
197
-
198
-// GetConfig returns the current timeout configuration
199
-func (i *Handle) GetConfig() (*Config, error) {
200
-	return i.doGetConfigCmd()
201
-}
202
-
203
-// SetConfig set the current timeout configuration. 0: no change
204
-func (i *Handle) SetConfig(c *Config) error {
205
-	return i.doSetConfigCmd(c)
206
-}
207 1
new file mode 100644
... ...
@@ -0,0 +1,203 @@
0
+package ipvs
1
+
2
+import (
3
+	"fmt"
4
+	"net"
5
+	"time"
6
+
7
+	"github.com/vishvananda/netlink/nl"
8
+	"github.com/vishvananda/netns"
9
+	"golang.org/x/sys/unix"
10
+)
11
+
12
+const (
13
+	netlinkRecvSocketsTimeout = 3 * time.Second
14
+	netlinkSendSocketTimeout  = 30 * time.Second
15
+)
16
+
17
+// Service defines an IPVS service in its entirety.
18
+type Service struct {
19
+	// Virtual service address.
20
+	Address  net.IP
21
+	Protocol uint16
22
+	Port     uint16
23
+	FWMark   uint32 // Firewall mark of the service.
24
+
25
+	// Virtual service options.
26
+	SchedName     string
27
+	Flags         uint32
28
+	Timeout       uint32
29
+	Netmask       uint32
30
+	AddressFamily uint16
31
+	PEName        string
32
+	Stats         SvcStats
33
+}
34
+
35
+// SvcStats defines an IPVS service statistics
36
+type SvcStats struct {
37
+	Connections uint32
38
+	PacketsIn   uint32
39
+	PacketsOut  uint32
40
+	BytesIn     uint64
41
+	BytesOut    uint64
42
+	CPS         uint32
43
+	BPSOut      uint32
44
+	PPSIn       uint32
45
+	PPSOut      uint32
46
+	BPSIn       uint32
47
+}
48
+
49
+// Destination defines an IPVS destination (real server) in its
50
+// entirety.
51
+type Destination struct {
52
+	Address             net.IP
53
+	Port                uint16
54
+	Weight              int
55
+	ConnectionFlags     uint32
56
+	AddressFamily       uint16
57
+	UpperThreshold      uint32
58
+	LowerThreshold      uint32
59
+	ActiveConnections   int
60
+	InactiveConnections int
61
+	Stats               DstStats
62
+}
63
+
64
+// DstStats defines IPVS destination (real server) statistics
65
+type DstStats SvcStats
66
+
67
+// Config defines IPVS timeout configuration
68
+type Config struct {
69
+	TimeoutTCP    time.Duration
70
+	TimeoutTCPFin time.Duration
71
+	TimeoutUDP    time.Duration
72
+}
73
+
74
+// Handle provides a namespace specific ipvs handle to program ipvs
75
+// rules.
76
+type Handle struct {
77
+	seq  uint32
78
+	sock *nl.NetlinkSocket
79
+}
80
+
81
+// New provides a new ipvs handle in the namespace pointed to by the
82
+// passed path. It will return a valid handle or an error in case an
83
+// error occurred while creating the handle.
84
+func New(path string) (*Handle, error) {
85
+	setup()
86
+
87
+	n := netns.None()
88
+	if path != "" {
89
+		var err error
90
+		n, err = netns.GetFromPath(path)
91
+		if err != nil {
92
+			return nil, err
93
+		}
94
+	}
95
+	defer n.Close()
96
+
97
+	sock, err := nl.GetNetlinkSocketAt(n, netns.None(), unix.NETLINK_GENERIC)
98
+	if err != nil {
99
+		return nil, err
100
+	}
101
+	// Add operation timeout to avoid deadlocks
102
+	tv := unix.NsecToTimeval(netlinkSendSocketTimeout.Nanoseconds())
103
+	if err := sock.SetSendTimeout(&tv); err != nil {
104
+		return nil, err
105
+	}
106
+	tv = unix.NsecToTimeval(netlinkRecvSocketsTimeout.Nanoseconds())
107
+	if err := sock.SetReceiveTimeout(&tv); err != nil {
108
+		return nil, err
109
+	}
110
+
111
+	return &Handle{sock: sock}, nil
112
+}
113
+
114
+// Close closes the ipvs handle. The handle is invalid after Close
115
+// returns.
116
+func (i *Handle) Close() {
117
+	if i.sock != nil {
118
+		i.sock.Close()
119
+	}
120
+}
121
+
122
+// NewService creates a new ipvs service in the passed handle.
123
+func (i *Handle) NewService(s *Service) error {
124
+	return i.doCmd(s, nil, ipvsCmdNewService)
125
+}
126
+
127
+// IsServicePresent queries for the ipvs service in the passed handle.
128
+func (i *Handle) IsServicePresent(s *Service) bool {
129
+	return nil == i.doCmd(s, nil, ipvsCmdGetService)
130
+}
131
+
132
+// UpdateService updates an already existing service in the passed
133
+// handle.
134
+func (i *Handle) UpdateService(s *Service) error {
135
+	return i.doCmd(s, nil, ipvsCmdSetService)
136
+}
137
+
138
+// DelService deletes an already existing service in the passed
139
+// handle.
140
+func (i *Handle) DelService(s *Service) error {
141
+	return i.doCmd(s, nil, ipvsCmdDelService)
142
+}
143
+
144
+// Flush deletes all existing services in the passed
145
+// handle.
146
+func (i *Handle) Flush() error {
147
+	_, err := i.doCmdWithoutAttr(ipvsCmdFlush)
148
+	return err
149
+}
150
+
151
+// NewDestination creates a new real server in the passed ipvs
152
+// service which should already be existing in the passed handle.
153
+func (i *Handle) NewDestination(s *Service, d *Destination) error {
154
+	return i.doCmd(s, d, ipvsCmdNewDest)
155
+}
156
+
157
+// UpdateDestination updates an already existing real server in the
158
+// passed ipvs service in the passed handle.
159
+func (i *Handle) UpdateDestination(s *Service, d *Destination) error {
160
+	return i.doCmd(s, d, ipvsCmdSetDest)
161
+}
162
+
163
+// DelDestination deletes an already existing real server in the
164
+// passed ipvs service in the passed handle.
165
+func (i *Handle) DelDestination(s *Service, d *Destination) error {
166
+	return i.doCmd(s, d, ipvsCmdDelDest)
167
+}
168
+
169
+// GetServices returns an array of services configured on the Node
170
+func (i *Handle) GetServices() ([]*Service, error) {
171
+	return i.doGetServicesCmd(nil)
172
+}
173
+
174
+// GetDestinations returns an array of Destinations configured for this Service
175
+func (i *Handle) GetDestinations(s *Service) ([]*Destination, error) {
176
+	return i.doGetDestinationsCmd(s, nil)
177
+}
178
+
179
+// GetService gets details of a specific IPVS services, useful in updating statisics etc.,
180
+func (i *Handle) GetService(s *Service) (*Service, error) {
181
+	res, err := i.doGetServicesCmd(s)
182
+	if err != nil {
183
+		return nil, err
184
+	}
185
+
186
+	// We are looking for exactly one service otherwise error out
187
+	if len(res) != 1 {
188
+		return nil, fmt.Errorf("Expected only one service obtained=%d", len(res))
189
+	}
190
+
191
+	return res[0], nil
192
+}
193
+
194
+// GetConfig returns the current timeout configuration
195
+func (i *Handle) GetConfig() (*Config, error) {
196
+	return i.doGetConfigCmd()
197
+}
198
+
199
+// SetConfig set the current timeout configuration. 0: no change
200
+func (i *Handle) SetConfig(c *Config) error {
201
+	return i.doSetConfigCmd(c)
202
+}
0 203
deleted file mode 100644
... ...
@@ -1,681 +0,0 @@
1
-// +build linux
2
-
3
-package ipvs
4
-
5
-import (
6
-	"bytes"
7
-	"encoding/binary"
8
-	"errors"
9
-	"fmt"
10
-	"net"
11
-	"os/exec"
12
-	"strings"
13
-	"sync"
14
-	"sync/atomic"
15
-	"syscall"
16
-	"time"
17
-	"unsafe"
18
-
19
-	"github.com/sirupsen/logrus"
20
-	"github.com/vishvananda/netlink/nl"
21
-	"github.com/vishvananda/netns"
22
-)
23
-
24
-// For Quick Reference IPVS related netlink message is described at the end of this file.
25
-var (
26
-	native     = nl.NativeEndian()
27
-	ipvsFamily int
28
-	ipvsOnce   sync.Once
29
-)
30
-
31
-type genlMsgHdr struct {
32
-	cmd      uint8
33
-	version  uint8
34
-	reserved uint16
35
-}
36
-
37
-type ipvsFlags struct {
38
-	flags uint32
39
-	mask  uint32
40
-}
41
-
42
-func deserializeGenlMsg(b []byte) (hdr *genlMsgHdr) {
43
-	return (*genlMsgHdr)(unsafe.Pointer(&b[0:unsafe.Sizeof(*hdr)][0]))
44
-}
45
-
46
-func (hdr *genlMsgHdr) Serialize() []byte {
47
-	return (*(*[unsafe.Sizeof(*hdr)]byte)(unsafe.Pointer(hdr)))[:]
48
-}
49
-
50
-func (hdr *genlMsgHdr) Len() int {
51
-	return int(unsafe.Sizeof(*hdr))
52
-}
53
-
54
-func (f *ipvsFlags) Serialize() []byte {
55
-	return (*(*[unsafe.Sizeof(*f)]byte)(unsafe.Pointer(f)))[:]
56
-}
57
-
58
-func (f *ipvsFlags) Len() int {
59
-	return int(unsafe.Sizeof(*f))
60
-}
61
-
62
-func setup() {
63
-	ipvsOnce.Do(func() {
64
-		var err error
65
-		if out, err := exec.Command("modprobe", "-va", "ip_vs").CombinedOutput(); err != nil {
66
-			logrus.Warnf("Running modprobe ip_vs failed with message: `%s`, error: %v", strings.TrimSpace(string(out)), err)
67
-		}
68
-
69
-		ipvsFamily, err = getIPVSFamily()
70
-		if err != nil {
71
-			logrus.Error("Could not get ipvs family information from the kernel. It is possible that ipvs is not enabled in your kernel. Native loadbalancing will not work until this is fixed.")
72
-		}
73
-	})
74
-}
75
-
76
-func fillService(s *Service) nl.NetlinkRequestData {
77
-	cmdAttr := nl.NewRtAttr(ipvsCmdAttrService, nil)
78
-	nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrAddressFamily, nl.Uint16Attr(s.AddressFamily))
79
-	if s.FWMark != 0 {
80
-		nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrFWMark, nl.Uint32Attr(s.FWMark))
81
-	} else {
82
-		nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrProtocol, nl.Uint16Attr(s.Protocol))
83
-		nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrAddress, rawIPData(s.Address))
84
-
85
-		// Port needs to be in network byte order.
86
-		portBuf := new(bytes.Buffer)
87
-		binary.Write(portBuf, binary.BigEndian, s.Port)
88
-		nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrPort, portBuf.Bytes())
89
-	}
90
-
91
-	nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrSchedName, nl.ZeroTerminated(s.SchedName))
92
-	if s.PEName != "" {
93
-		nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrPEName, nl.ZeroTerminated(s.PEName))
94
-	}
95
-	f := &ipvsFlags{
96
-		flags: s.Flags,
97
-		mask:  0xFFFFFFFF,
98
-	}
99
-	nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrFlags, f.Serialize())
100
-	nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrTimeout, nl.Uint32Attr(s.Timeout))
101
-	nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrNetmask, nl.Uint32Attr(s.Netmask))
102
-	return cmdAttr
103
-}
104
-
105
-func fillDestination(d *Destination) nl.NetlinkRequestData {
106
-	cmdAttr := nl.NewRtAttr(ipvsCmdAttrDest, nil)
107
-
108
-	nl.NewRtAttrChild(cmdAttr, ipvsDestAttrAddress, rawIPData(d.Address))
109
-	// Port needs to be in network byte order.
110
-	portBuf := new(bytes.Buffer)
111
-	binary.Write(portBuf, binary.BigEndian, d.Port)
112
-	nl.NewRtAttrChild(cmdAttr, ipvsDestAttrPort, portBuf.Bytes())
113
-
114
-	nl.NewRtAttrChild(cmdAttr, ipvsDestAttrForwardingMethod, nl.Uint32Attr(d.ConnectionFlags&ConnectionFlagFwdMask))
115
-	nl.NewRtAttrChild(cmdAttr, ipvsDestAttrWeight, nl.Uint32Attr(uint32(d.Weight)))
116
-	nl.NewRtAttrChild(cmdAttr, ipvsDestAttrUpperThreshold, nl.Uint32Attr(d.UpperThreshold))
117
-	nl.NewRtAttrChild(cmdAttr, ipvsDestAttrLowerThreshold, nl.Uint32Attr(d.LowerThreshold))
118
-
119
-	return cmdAttr
120
-}
121
-
122
-func (i *Handle) doCmdwithResponse(s *Service, d *Destination, cmd uint8) ([][]byte, error) {
123
-	req := newIPVSRequest(cmd)
124
-	req.Seq = atomic.AddUint32(&i.seq, 1)
125
-
126
-	if s == nil {
127
-		req.Flags |= syscall.NLM_F_DUMP                    //Flag to dump all messages
128
-		req.AddData(nl.NewRtAttr(ipvsCmdAttrService, nil)) //Add a dummy attribute
129
-	} else {
130
-		req.AddData(fillService(s))
131
-	}
132
-
133
-	if d == nil {
134
-		if cmd == ipvsCmdGetDest {
135
-			req.Flags |= syscall.NLM_F_DUMP
136
-		}
137
-
138
-	} else {
139
-		req.AddData(fillDestination(d))
140
-	}
141
-
142
-	res, err := execute(i.sock, req, 0)
143
-	if err != nil {
144
-		return [][]byte{}, err
145
-	}
146
-
147
-	return res, nil
148
-}
149
-
150
-func (i *Handle) doCmd(s *Service, d *Destination, cmd uint8) error {
151
-	_, err := i.doCmdwithResponse(s, d, cmd)
152
-
153
-	return err
154
-}
155
-
156
-func getIPVSFamily() (int, error) {
157
-	sock, err := nl.GetNetlinkSocketAt(netns.None(), netns.None(), syscall.NETLINK_GENERIC)
158
-	if err != nil {
159
-		return 0, err
160
-	}
161
-	defer sock.Close()
162
-
163
-	req := newGenlRequest(genlCtrlID, genlCtrlCmdGetFamily)
164
-	req.AddData(nl.NewRtAttr(genlCtrlAttrFamilyName, nl.ZeroTerminated("IPVS")))
165
-
166
-	msgs, err := execute(sock, req, 0)
167
-	if err != nil {
168
-		return 0, err
169
-	}
170
-
171
-	for _, m := range msgs {
172
-		hdr := deserializeGenlMsg(m)
173
-		attrs, err := nl.ParseRouteAttr(m[hdr.Len():])
174
-		if err != nil {
175
-			return 0, err
176
-		}
177
-
178
-		for _, attr := range attrs {
179
-			switch int(attr.Attr.Type) {
180
-			case genlCtrlAttrFamilyID:
181
-				return int(native.Uint16(attr.Value[0:2])), nil
182
-			}
183
-		}
184
-	}
185
-
186
-	return 0, fmt.Errorf("no family id in the netlink response")
187
-}
188
-
189
-func rawIPData(ip net.IP) []byte {
190
-	family := nl.GetIPFamily(ip)
191
-	if family == nl.FAMILY_V4 {
192
-		return ip.To4()
193
-	}
194
-	return ip
195
-}
196
-
197
-func newIPVSRequest(cmd uint8) *nl.NetlinkRequest {
198
-	return newGenlRequest(ipvsFamily, cmd)
199
-}
200
-
201
-func newGenlRequest(familyID int, cmd uint8) *nl.NetlinkRequest {
202
-	req := nl.NewNetlinkRequest(familyID, syscall.NLM_F_ACK)
203
-	req.AddData(&genlMsgHdr{cmd: cmd, version: 1})
204
-	return req
205
-}
206
-
207
-func execute(s *nl.NetlinkSocket, req *nl.NetlinkRequest, resType uint16) ([][]byte, error) {
208
-	if err := s.Send(req); err != nil {
209
-		return nil, err
210
-	}
211
-
212
-	pid, err := s.GetPid()
213
-	if err != nil {
214
-		return nil, err
215
-	}
216
-
217
-	var res [][]byte
218
-
219
-done:
220
-	for {
221
-		msgs, _, err := s.Receive()
222
-		if err != nil {
223
-			if s.GetFd() == -1 {
224
-				return nil, fmt.Errorf("Socket got closed on receive")
225
-			}
226
-			if err == syscall.EAGAIN {
227
-				// timeout fired
228
-				continue
229
-			}
230
-			return nil, err
231
-		}
232
-		for _, m := range msgs {
233
-			if m.Header.Seq != req.Seq {
234
-				continue
235
-			}
236
-			if m.Header.Pid != pid {
237
-				return nil, fmt.Errorf("Wrong pid %d, expected %d", m.Header.Pid, pid)
238
-			}
239
-			if m.Header.Type == syscall.NLMSG_DONE {
240
-				break done
241
-			}
242
-			if m.Header.Type == syscall.NLMSG_ERROR {
243
-				error := int32(native.Uint32(m.Data[0:4]))
244
-				if error == 0 {
245
-					break done
246
-				}
247
-				return nil, syscall.Errno(-error)
248
-			}
249
-			if resType != 0 && m.Header.Type != resType {
250
-				continue
251
-			}
252
-			res = append(res, m.Data)
253
-			if m.Header.Flags&syscall.NLM_F_MULTI == 0 {
254
-				break done
255
-			}
256
-		}
257
-	}
258
-	return res, nil
259
-}
260
-
261
-func parseIP(ip []byte, family uint16) (net.IP, error) {
262
-
263
-	var resIP net.IP
264
-
265
-	switch family {
266
-	case syscall.AF_INET:
267
-		resIP = (net.IP)(ip[:4])
268
-	case syscall.AF_INET6:
269
-		resIP = (net.IP)(ip[:16])
270
-	default:
271
-		return nil, fmt.Errorf("parseIP Error ip=%v", ip)
272
-
273
-	}
274
-	return resIP, nil
275
-}
276
-
277
-// parseStats
278
-func assembleStats(msg []byte) (SvcStats, error) {
279
-
280
-	var s SvcStats
281
-
282
-	attrs, err := nl.ParseRouteAttr(msg)
283
-	if err != nil {
284
-		return s, err
285
-	}
286
-
287
-	for _, attr := range attrs {
288
-		attrType := int(attr.Attr.Type)
289
-		switch attrType {
290
-		case ipvsStatsConns:
291
-			s.Connections = native.Uint32(attr.Value)
292
-		case ipvsStatsPktsIn:
293
-			s.PacketsIn = native.Uint32(attr.Value)
294
-		case ipvsStatsPktsOut:
295
-			s.PacketsOut = native.Uint32(attr.Value)
296
-		case ipvsStatsBytesIn:
297
-			s.BytesIn = native.Uint64(attr.Value)
298
-		case ipvsStatsBytesOut:
299
-			s.BytesOut = native.Uint64(attr.Value)
300
-		case ipvsStatsCPS:
301
-			s.CPS = native.Uint32(attr.Value)
302
-		case ipvsStatsPPSIn:
303
-			s.PPSIn = native.Uint32(attr.Value)
304
-		case ipvsStatsPPSOut:
305
-			s.PPSOut = native.Uint32(attr.Value)
306
-		case ipvsStatsBPSIn:
307
-			s.BPSIn = native.Uint32(attr.Value)
308
-		case ipvsStatsBPSOut:
309
-			s.BPSOut = native.Uint32(attr.Value)
310
-		}
311
-	}
312
-	return s, nil
313
-}
314
-
315
-// assembleService assembles a services back from a hain of netlink attributes
316
-func assembleService(attrs []syscall.NetlinkRouteAttr) (*Service, error) {
317
-
318
-	var s Service
319
-	var addressBytes []byte
320
-
321
-	for _, attr := range attrs {
322
-
323
-		attrType := int(attr.Attr.Type)
324
-
325
-		switch attrType {
326
-
327
-		case ipvsSvcAttrAddressFamily:
328
-			s.AddressFamily = native.Uint16(attr.Value)
329
-		case ipvsSvcAttrProtocol:
330
-			s.Protocol = native.Uint16(attr.Value)
331
-		case ipvsSvcAttrAddress:
332
-			addressBytes = attr.Value
333
-		case ipvsSvcAttrPort:
334
-			s.Port = binary.BigEndian.Uint16(attr.Value)
335
-		case ipvsSvcAttrFWMark:
336
-			s.FWMark = native.Uint32(attr.Value)
337
-		case ipvsSvcAttrSchedName:
338
-			s.SchedName = nl.BytesToString(attr.Value)
339
-		case ipvsSvcAttrFlags:
340
-			s.Flags = native.Uint32(attr.Value)
341
-		case ipvsSvcAttrTimeout:
342
-			s.Timeout = native.Uint32(attr.Value)
343
-		case ipvsSvcAttrNetmask:
344
-			s.Netmask = native.Uint32(attr.Value)
345
-		case ipvsSvcAttrStats:
346
-			stats, err := assembleStats(attr.Value)
347
-			if err != nil {
348
-				return nil, err
349
-			}
350
-			s.Stats = stats
351
-		}
352
-
353
-	}
354
-
355
-	// parse Address after parse AddressFamily incase of parseIP error
356
-	if addressBytes != nil {
357
-		ip, err := parseIP(addressBytes, s.AddressFamily)
358
-		if err != nil {
359
-			return nil, err
360
-		}
361
-		s.Address = ip
362
-	}
363
-
364
-	return &s, nil
365
-}
366
-
367
-// parseService given a ipvs netlink response this function will respond with a valid service entry, an error otherwise
368
-func (i *Handle) parseService(msg []byte) (*Service, error) {
369
-
370
-	var s *Service
371
-
372
-	//Remove General header for this message and parse the NetLink message
373
-	hdr := deserializeGenlMsg(msg)
374
-	NetLinkAttrs, err := nl.ParseRouteAttr(msg[hdr.Len():])
375
-	if err != nil {
376
-		return nil, err
377
-	}
378
-	if len(NetLinkAttrs) == 0 {
379
-		return nil, fmt.Errorf("error no valid netlink message found while parsing service record")
380
-	}
381
-
382
-	//Now Parse and get IPVS related attributes messages packed in this message.
383
-	ipvsAttrs, err := nl.ParseRouteAttr(NetLinkAttrs[0].Value)
384
-	if err != nil {
385
-		return nil, err
386
-	}
387
-
388
-	//Assemble all the IPVS related attribute messages and create a service record
389
-	s, err = assembleService(ipvsAttrs)
390
-	if err != nil {
391
-		return nil, err
392
-	}
393
-
394
-	return s, nil
395
-}
396
-
397
-// doGetServicesCmd a wrapper which could be used commonly for both GetServices() and GetService(*Service)
398
-func (i *Handle) doGetServicesCmd(svc *Service) ([]*Service, error) {
399
-	var res []*Service
400
-
401
-	msgs, err := i.doCmdwithResponse(svc, nil, ipvsCmdGetService)
402
-	if err != nil {
403
-		return nil, err
404
-	}
405
-
406
-	for _, msg := range msgs {
407
-		srv, err := i.parseService(msg)
408
-		if err != nil {
409
-			return nil, err
410
-		}
411
-		res = append(res, srv)
412
-	}
413
-
414
-	return res, nil
415
-}
416
-
417
-// doCmdWithoutAttr a simple wrapper of netlink socket execute command
418
-func (i *Handle) doCmdWithoutAttr(cmd uint8) ([][]byte, error) {
419
-	req := newIPVSRequest(cmd)
420
-	req.Seq = atomic.AddUint32(&i.seq, 1)
421
-	return execute(i.sock, req, 0)
422
-}
423
-
424
-func assembleDestination(attrs []syscall.NetlinkRouteAttr) (*Destination, error) {
425
-
426
-	var d Destination
427
-	var addressBytes []byte
428
-
429
-	for _, attr := range attrs {
430
-
431
-		attrType := int(attr.Attr.Type)
432
-
433
-		switch attrType {
434
-
435
-		case ipvsDestAttrAddressFamily:
436
-			d.AddressFamily = native.Uint16(attr.Value)
437
-		case ipvsDestAttrAddress:
438
-			addressBytes = attr.Value
439
-		case ipvsDestAttrPort:
440
-			d.Port = binary.BigEndian.Uint16(attr.Value)
441
-		case ipvsDestAttrForwardingMethod:
442
-			d.ConnectionFlags = native.Uint32(attr.Value)
443
-		case ipvsDestAttrWeight:
444
-			d.Weight = int(native.Uint16(attr.Value))
445
-		case ipvsDestAttrUpperThreshold:
446
-			d.UpperThreshold = native.Uint32(attr.Value)
447
-		case ipvsDestAttrLowerThreshold:
448
-			d.LowerThreshold = native.Uint32(attr.Value)
449
-		case ipvsDestAttrActiveConnections:
450
-			d.ActiveConnections = int(native.Uint32(attr.Value))
451
-		case ipvsDestAttrInactiveConnections:
452
-			d.InactiveConnections = int(native.Uint32(attr.Value))
453
-		case ipvsDestAttrStats:
454
-			stats, err := assembleStats(attr.Value)
455
-			if err != nil {
456
-				return nil, err
457
-			}
458
-			d.Stats = DstStats(stats)
459
-		}
460
-	}
461
-
462
-	// in older kernels (< 3.18), the destination address family attribute doesn't exist so we must
463
-	// assume it based on the destination address provided.
464
-	if d.AddressFamily == 0 {
465
-		// we can't check the address family using net stdlib because netlink returns
466
-		// IPv4 addresses as the first 4 bytes in a []byte of length 16 where as
467
-		// stdlib expects it as the last 4 bytes.
468
-		addressFamily, err := getIPFamily(addressBytes)
469
-		if err != nil {
470
-			return nil, err
471
-		}
472
-		d.AddressFamily = addressFamily
473
-	}
474
-
475
-	// parse Address after parse AddressFamily incase of parseIP error
476
-	if addressBytes != nil {
477
-		ip, err := parseIP(addressBytes, d.AddressFamily)
478
-		if err != nil {
479
-			return nil, err
480
-		}
481
-		d.Address = ip
482
-	}
483
-
484
-	return &d, nil
485
-}
486
-
487
-// getIPFamily parses the IP family based on raw data from netlink.
488
-// For AF_INET, netlink will set the first 4 bytes with trailing zeros
489
-//   10.0.0.1 -> [10 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0]
490
-// For AF_INET6, the full 16 byte array is used:
491
-//   2001:db8:3c4d:15::1a00 -> [32 1 13 184 60 77 0 21 0 0 0 0 0 0 26 0]
492
-func getIPFamily(address []byte) (uint16, error) {
493
-	if len(address) == 4 {
494
-		return syscall.AF_INET, nil
495
-	}
496
-
497
-	if isZeros(address) {
498
-		return 0, errors.New("could not parse IP family from address data")
499
-	}
500
-
501
-	// assume IPv4 if first 4 bytes are non-zero but rest of the data is trailing zeros
502
-	if !isZeros(address[:4]) && isZeros(address[4:]) {
503
-		return syscall.AF_INET, nil
504
-	}
505
-
506
-	return syscall.AF_INET6, nil
507
-}
508
-
509
-func isZeros(b []byte) bool {
510
-	for i := 0; i < len(b); i++ {
511
-		if b[i] != 0 {
512
-			return false
513
-		}
514
-	}
515
-	return true
516
-}
517
-
518
-// parseDestination given a ipvs netlink response this function will respond with a valid destination entry, an error otherwise
519
-func (i *Handle) parseDestination(msg []byte) (*Destination, error) {
520
-	var dst *Destination
521
-
522
-	//Remove General header for this message
523
-	hdr := deserializeGenlMsg(msg)
524
-	NetLinkAttrs, err := nl.ParseRouteAttr(msg[hdr.Len():])
525
-	if err != nil {
526
-		return nil, err
527
-	}
528
-	if len(NetLinkAttrs) == 0 {
529
-		return nil, fmt.Errorf("error no valid netlink message found while parsing destination record")
530
-	}
531
-
532
-	//Now Parse and get IPVS related attributes messages packed in this message.
533
-	ipvsAttrs, err := nl.ParseRouteAttr(NetLinkAttrs[0].Value)
534
-	if err != nil {
535
-		return nil, err
536
-	}
537
-
538
-	//Assemble netlink attributes and create a Destination record
539
-	dst, err = assembleDestination(ipvsAttrs)
540
-	if err != nil {
541
-		return nil, err
542
-	}
543
-
544
-	return dst, nil
545
-}
546
-
547
-// doGetDestinationsCmd a wrapper function to be used by GetDestinations and GetDestination(d) apis
548
-func (i *Handle) doGetDestinationsCmd(s *Service, d *Destination) ([]*Destination, error) {
549
-
550
-	var res []*Destination
551
-
552
-	msgs, err := i.doCmdwithResponse(s, d, ipvsCmdGetDest)
553
-	if err != nil {
554
-		return nil, err
555
-	}
556
-
557
-	for _, msg := range msgs {
558
-		dest, err := i.parseDestination(msg)
559
-		if err != nil {
560
-			return res, err
561
-		}
562
-		res = append(res, dest)
563
-	}
564
-	return res, nil
565
-}
566
-
567
-// parseConfig given a ipvs netlink response this function will respond with a valid config entry, an error otherwise
568
-func (i *Handle) parseConfig(msg []byte) (*Config, error) {
569
-	var c Config
570
-
571
-	//Remove General header for this message
572
-	hdr := deserializeGenlMsg(msg)
573
-	attrs, err := nl.ParseRouteAttr(msg[hdr.Len():])
574
-	if err != nil {
575
-		return nil, err
576
-	}
577
-
578
-	for _, attr := range attrs {
579
-		attrType := int(attr.Attr.Type)
580
-		switch attrType {
581
-		case ipvsCmdAttrTimeoutTCP:
582
-			c.TimeoutTCP = time.Duration(native.Uint32(attr.Value)) * time.Second
583
-		case ipvsCmdAttrTimeoutTCPFin:
584
-			c.TimeoutTCPFin = time.Duration(native.Uint32(attr.Value)) * time.Second
585
-		case ipvsCmdAttrTimeoutUDP:
586
-			c.TimeoutUDP = time.Duration(native.Uint32(attr.Value)) * time.Second
587
-		}
588
-	}
589
-
590
-	return &c, nil
591
-}
592
-
593
-// doGetConfigCmd a wrapper function to be used by GetConfig
594
-func (i *Handle) doGetConfigCmd() (*Config, error) {
595
-	msg, err := i.doCmdWithoutAttr(ipvsCmdGetConfig)
596
-	if err != nil {
597
-		return nil, err
598
-	}
599
-
600
-	res, err := i.parseConfig(msg[0])
601
-	if err != nil {
602
-		return res, err
603
-	}
604
-	return res, nil
605
-}
606
-
607
-// doSetConfigCmd a wrapper function to be used by SetConfig
608
-func (i *Handle) doSetConfigCmd(c *Config) error {
609
-	req := newIPVSRequest(ipvsCmdSetConfig)
610
-	req.Seq = atomic.AddUint32(&i.seq, 1)
611
-
612
-	req.AddData(nl.NewRtAttr(ipvsCmdAttrTimeoutTCP, nl.Uint32Attr(uint32(c.TimeoutTCP.Seconds()))))
613
-	req.AddData(nl.NewRtAttr(ipvsCmdAttrTimeoutTCPFin, nl.Uint32Attr(uint32(c.TimeoutTCPFin.Seconds()))))
614
-	req.AddData(nl.NewRtAttr(ipvsCmdAttrTimeoutUDP, nl.Uint32Attr(uint32(c.TimeoutUDP.Seconds()))))
615
-
616
-	_, err := execute(i.sock, req, 0)
617
-
618
-	return err
619
-}
620
-
621
-// IPVS related netlink message format explained
622
-
623
-/* EACH NETLINK MSG is of the below format, this is what we will receive from execute() api.
624
-   If we have multiple netlink objects to process like GetServices() etc., execute() will
625
-   supply an array of this below object
626
-
627
-            NETLINK MSG
628
-|-----------------------------------|
629
-    0        1        2        3
630
-|--------|--------|--------|--------| -
631
-| CMD ID |  VER   |    RESERVED     | |==> General Message Header represented by genlMsgHdr
632
-|-----------------------------------| -
633
-|    ATTR LEN     |   ATTR TYPE     | |
634
-|-----------------------------------| |
635
-|                                   | |
636
-|              VALUE                | |
637
-|     []byte Array of IPVS MSG      | |==> Attribute Message represented by syscall.NetlinkRouteAttr
638
-|        PADDED BY 4 BYTES          | |
639
-|                                   | |
640
-|-----------------------------------| -
641
-
642
-
643
- Once We strip genlMsgHdr from above NETLINK MSG, we should parse the VALUE.
644
- VALUE will have an array of netlink attributes (syscall.NetlinkRouteAttr) such that each attribute will
645
- represent a "Service" or "Destination" object's field.  If we assemble these attributes we can construct
646
- Service or Destination.
647
-
648
-            IPVS MSG
649
-|-----------------------------------|
650
-     0        1        2        3
651
-|--------|--------|--------|--------|
652
-|    ATTR LEN     |    ATTR TYPE    |
653
-|-----------------------------------|
654
-|                                   |
655
-|                                   |
656
-| []byte IPVS ATTRIBUTE  BY 4 BYTES |
657
-|                                   |
658
-|                                   |
659
-|-----------------------------------|
660
-           NEXT ATTRIBUTE
661
-|-----------------------------------|
662
-|    ATTR LEN     |    ATTR TYPE    |
663
-|-----------------------------------|
664
-|                                   |
665
-|                                   |
666
-| []byte IPVS ATTRIBUTE  BY 4 BYTES |
667
-|                                   |
668
-|                                   |
669
-|-----------------------------------|
670
-           NEXT ATTRIBUTE
671
-|-----------------------------------|
672
-|    ATTR LEN     |    ATTR TYPE    |
673
-|-----------------------------------|
674
-|                                   |
675
-|                                   |
676
-| []byte IPVS ATTRIBUTE  BY 4 BYTES |
677
-|                                   |
678
-|                                   |
679
-|-----------------------------------|
680
-
681
-*/
682 1
new file mode 100644
... ...
@@ -0,0 +1,675 @@
0
+package ipvs
1
+
2
+import (
3
+	"bytes"
4
+	"encoding/binary"
5
+	"errors"
6
+	"fmt"
7
+	"net"
8
+	"os/exec"
9
+	"strings"
10
+	"sync"
11
+	"sync/atomic"
12
+	"syscall"
13
+	"time"
14
+	"unsafe"
15
+
16
+	"github.com/sirupsen/logrus"
17
+	"github.com/vishvananda/netlink/nl"
18
+	"github.com/vishvananda/netns"
19
+)
20
+
21
+// For Quick Reference IPVS related netlink message is described at the end of this file.
22
+var (
23
+	native     = nl.NativeEndian()
24
+	ipvsFamily int
25
+	ipvsOnce   sync.Once
26
+)
27
+
28
+type genlMsgHdr struct {
29
+	cmd      uint8
30
+	version  uint8
31
+	reserved uint16
32
+}
33
+
34
+type ipvsFlags struct {
35
+	flags uint32
36
+	mask  uint32
37
+}
38
+
39
+func deserializeGenlMsg(b []byte) (hdr *genlMsgHdr) {
40
+	return (*genlMsgHdr)(unsafe.Pointer(&b[0:unsafe.Sizeof(*hdr)][0]))
41
+}
42
+
43
+func (hdr *genlMsgHdr) Serialize() []byte {
44
+	return (*(*[unsafe.Sizeof(*hdr)]byte)(unsafe.Pointer(hdr)))[:]
45
+}
46
+
47
+func (hdr *genlMsgHdr) Len() int {
48
+	return int(unsafe.Sizeof(*hdr))
49
+}
50
+
51
+func (f *ipvsFlags) Serialize() []byte {
52
+	return (*(*[unsafe.Sizeof(*f)]byte)(unsafe.Pointer(f)))[:]
53
+}
54
+
55
+func (f *ipvsFlags) Len() int {
56
+	return int(unsafe.Sizeof(*f))
57
+}
58
+
59
+func setup() {
60
+	ipvsOnce.Do(func() {
61
+		var err error
62
+		if out, err := exec.Command("modprobe", "-va", "ip_vs").CombinedOutput(); err != nil {
63
+			logrus.Warnf("Running modprobe ip_vs failed with message: `%s`, error: %v", strings.TrimSpace(string(out)), err)
64
+		}
65
+
66
+		ipvsFamily, err = getIPVSFamily()
67
+		if err != nil {
68
+			logrus.Error("Could not get ipvs family information from the kernel. It is possible that ipvs is not enabled in your kernel. Native loadbalancing will not work until this is fixed.")
69
+		}
70
+	})
71
+}
72
+
73
+func fillService(s *Service) nl.NetlinkRequestData {
74
+	cmdAttr := nl.NewRtAttr(ipvsCmdAttrService, nil)
75
+	nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrAddressFamily, nl.Uint16Attr(s.AddressFamily))
76
+	if s.FWMark != 0 {
77
+		nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrFWMark, nl.Uint32Attr(s.FWMark))
78
+	} else {
79
+		nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrProtocol, nl.Uint16Attr(s.Protocol))
80
+		nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrAddress, rawIPData(s.Address))
81
+
82
+		// Port needs to be in network byte order.
83
+		portBuf := new(bytes.Buffer)
84
+		binary.Write(portBuf, binary.BigEndian, s.Port)
85
+		nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrPort, portBuf.Bytes())
86
+	}
87
+
88
+	nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrSchedName, nl.ZeroTerminated(s.SchedName))
89
+	if s.PEName != "" {
90
+		nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrPEName, nl.ZeroTerminated(s.PEName))
91
+	}
92
+	f := &ipvsFlags{
93
+		flags: s.Flags,
94
+		mask:  0xFFFFFFFF,
95
+	}
96
+	nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrFlags, f.Serialize())
97
+	nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrTimeout, nl.Uint32Attr(s.Timeout))
98
+	nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrNetmask, nl.Uint32Attr(s.Netmask))
99
+	return cmdAttr
100
+}
101
+
102
+func fillDestination(d *Destination) nl.NetlinkRequestData {
103
+	cmdAttr := nl.NewRtAttr(ipvsCmdAttrDest, nil)
104
+
105
+	nl.NewRtAttrChild(cmdAttr, ipvsDestAttrAddress, rawIPData(d.Address))
106
+	// Port needs to be in network byte order.
107
+	portBuf := new(bytes.Buffer)
108
+	binary.Write(portBuf, binary.BigEndian, d.Port)
109
+	nl.NewRtAttrChild(cmdAttr, ipvsDestAttrPort, portBuf.Bytes())
110
+
111
+	nl.NewRtAttrChild(cmdAttr, ipvsDestAttrForwardingMethod, nl.Uint32Attr(d.ConnectionFlags&ConnectionFlagFwdMask))
112
+	nl.NewRtAttrChild(cmdAttr, ipvsDestAttrWeight, nl.Uint32Attr(uint32(d.Weight)))
113
+	nl.NewRtAttrChild(cmdAttr, ipvsDestAttrUpperThreshold, nl.Uint32Attr(d.UpperThreshold))
114
+	nl.NewRtAttrChild(cmdAttr, ipvsDestAttrLowerThreshold, nl.Uint32Attr(d.LowerThreshold))
115
+
116
+	return cmdAttr
117
+}
118
+
119
+func (i *Handle) doCmdwithResponse(s *Service, d *Destination, cmd uint8) ([][]byte, error) {
120
+	req := newIPVSRequest(cmd)
121
+	req.Seq = atomic.AddUint32(&i.seq, 1)
122
+
123
+	if s == nil {
124
+		req.Flags |= syscall.NLM_F_DUMP                    // Flag to dump all messages
125
+		req.AddData(nl.NewRtAttr(ipvsCmdAttrService, nil)) // Add a dummy attribute
126
+	} else {
127
+		req.AddData(fillService(s))
128
+	}
129
+
130
+	if d == nil {
131
+		if cmd == ipvsCmdGetDest {
132
+			req.Flags |= syscall.NLM_F_DUMP
133
+		}
134
+	} else {
135
+		req.AddData(fillDestination(d))
136
+	}
137
+
138
+	res, err := execute(i.sock, req, 0)
139
+	if err != nil {
140
+		return [][]byte{}, err
141
+	}
142
+
143
+	return res, nil
144
+}
145
+
146
+func (i *Handle) doCmd(s *Service, d *Destination, cmd uint8) error {
147
+	_, err := i.doCmdwithResponse(s, d, cmd)
148
+
149
+	return err
150
+}
151
+
152
+func getIPVSFamily() (int, error) {
153
+	sock, err := nl.GetNetlinkSocketAt(netns.None(), netns.None(), syscall.NETLINK_GENERIC)
154
+	if err != nil {
155
+		return 0, err
156
+	}
157
+	defer sock.Close()
158
+
159
+	req := newGenlRequest(genlCtrlID, genlCtrlCmdGetFamily)
160
+	req.AddData(nl.NewRtAttr(genlCtrlAttrFamilyName, nl.ZeroTerminated("IPVS")))
161
+
162
+	msgs, err := execute(sock, req, 0)
163
+	if err != nil {
164
+		return 0, err
165
+	}
166
+
167
+	for _, m := range msgs {
168
+		hdr := deserializeGenlMsg(m)
169
+		attrs, err := nl.ParseRouteAttr(m[hdr.Len():])
170
+		if err != nil {
171
+			return 0, err
172
+		}
173
+
174
+		for _, attr := range attrs {
175
+			switch int(attr.Attr.Type) {
176
+			case genlCtrlAttrFamilyID:
177
+				return int(native.Uint16(attr.Value[0:2])), nil
178
+			}
179
+		}
180
+	}
181
+
182
+	return 0, fmt.Errorf("no family id in the netlink response")
183
+}
184
+
185
+func rawIPData(ip net.IP) []byte {
186
+	family := nl.GetIPFamily(ip)
187
+	if family == nl.FAMILY_V4 {
188
+		return ip.To4()
189
+	}
190
+	return ip
191
+}
192
+
193
+func newIPVSRequest(cmd uint8) *nl.NetlinkRequest {
194
+	return newGenlRequest(ipvsFamily, cmd)
195
+}
196
+
197
+func newGenlRequest(familyID int, cmd uint8) *nl.NetlinkRequest {
198
+	req := nl.NewNetlinkRequest(familyID, syscall.NLM_F_ACK)
199
+	req.AddData(&genlMsgHdr{cmd: cmd, version: 1})
200
+	return req
201
+}
202
+
203
+func execute(s *nl.NetlinkSocket, req *nl.NetlinkRequest, resType uint16) ([][]byte, error) {
204
+	if err := s.Send(req); err != nil {
205
+		return nil, err
206
+	}
207
+
208
+	pid, err := s.GetPid()
209
+	if err != nil {
210
+		return nil, err
211
+	}
212
+
213
+	var res [][]byte
214
+
215
+done:
216
+	for {
217
+		msgs, _, err := s.Receive()
218
+		if err != nil {
219
+			if s.GetFd() == -1 {
220
+				return nil, fmt.Errorf("Socket got closed on receive")
221
+			}
222
+			if err == syscall.EAGAIN {
223
+				// timeout fired
224
+				continue
225
+			}
226
+			return nil, err
227
+		}
228
+		for _, m := range msgs {
229
+			if m.Header.Seq != req.Seq {
230
+				continue
231
+			}
232
+			if m.Header.Pid != pid {
233
+				return nil, fmt.Errorf("Wrong pid %d, expected %d", m.Header.Pid, pid)
234
+			}
235
+			if m.Header.Type == syscall.NLMSG_DONE {
236
+				break done
237
+			}
238
+			if m.Header.Type == syscall.NLMSG_ERROR {
239
+				error := int32(native.Uint32(m.Data[0:4]))
240
+				if error == 0 {
241
+					break done
242
+				}
243
+				return nil, syscall.Errno(-error)
244
+			}
245
+			if resType != 0 && m.Header.Type != resType {
246
+				continue
247
+			}
248
+			res = append(res, m.Data)
249
+			if m.Header.Flags&syscall.NLM_F_MULTI == 0 {
250
+				break done
251
+			}
252
+		}
253
+	}
254
+	return res, nil
255
+}
256
+
257
+func parseIP(ip []byte, family uint16) (net.IP, error) {
258
+	var resIP net.IP
259
+
260
+	switch family {
261
+	case syscall.AF_INET:
262
+		resIP = (net.IP)(ip[:4])
263
+	case syscall.AF_INET6:
264
+		resIP = (net.IP)(ip[:16])
265
+	default:
266
+		return nil, fmt.Errorf("parseIP Error ip=%v", ip)
267
+
268
+	}
269
+	return resIP, nil
270
+}
271
+
272
+// parseStats
273
+func assembleStats(msg []byte) (SvcStats, error) {
274
+	var s SvcStats
275
+
276
+	attrs, err := nl.ParseRouteAttr(msg)
277
+	if err != nil {
278
+		return s, err
279
+	}
280
+
281
+	for _, attr := range attrs {
282
+		attrType := int(attr.Attr.Type)
283
+		switch attrType {
284
+		case ipvsStatsConns:
285
+			s.Connections = native.Uint32(attr.Value)
286
+		case ipvsStatsPktsIn:
287
+			s.PacketsIn = native.Uint32(attr.Value)
288
+		case ipvsStatsPktsOut:
289
+			s.PacketsOut = native.Uint32(attr.Value)
290
+		case ipvsStatsBytesIn:
291
+			s.BytesIn = native.Uint64(attr.Value)
292
+		case ipvsStatsBytesOut:
293
+			s.BytesOut = native.Uint64(attr.Value)
294
+		case ipvsStatsCPS:
295
+			s.CPS = native.Uint32(attr.Value)
296
+		case ipvsStatsPPSIn:
297
+			s.PPSIn = native.Uint32(attr.Value)
298
+		case ipvsStatsPPSOut:
299
+			s.PPSOut = native.Uint32(attr.Value)
300
+		case ipvsStatsBPSIn:
301
+			s.BPSIn = native.Uint32(attr.Value)
302
+		case ipvsStatsBPSOut:
303
+			s.BPSOut = native.Uint32(attr.Value)
304
+		}
305
+	}
306
+	return s, nil
307
+}
308
+
309
+// assembleService assembles a services back from a hain of netlink attributes
310
+func assembleService(attrs []syscall.NetlinkRouteAttr) (*Service, error) {
311
+	var s Service
312
+	var addressBytes []byte
313
+
314
+	for _, attr := range attrs {
315
+
316
+		attrType := int(attr.Attr.Type)
317
+
318
+		switch attrType {
319
+
320
+		case ipvsSvcAttrAddressFamily:
321
+			s.AddressFamily = native.Uint16(attr.Value)
322
+		case ipvsSvcAttrProtocol:
323
+			s.Protocol = native.Uint16(attr.Value)
324
+		case ipvsSvcAttrAddress:
325
+			addressBytes = attr.Value
326
+		case ipvsSvcAttrPort:
327
+			s.Port = binary.BigEndian.Uint16(attr.Value)
328
+		case ipvsSvcAttrFWMark:
329
+			s.FWMark = native.Uint32(attr.Value)
330
+		case ipvsSvcAttrSchedName:
331
+			s.SchedName = nl.BytesToString(attr.Value)
332
+		case ipvsSvcAttrFlags:
333
+			s.Flags = native.Uint32(attr.Value)
334
+		case ipvsSvcAttrTimeout:
335
+			s.Timeout = native.Uint32(attr.Value)
336
+		case ipvsSvcAttrNetmask:
337
+			s.Netmask = native.Uint32(attr.Value)
338
+		case ipvsSvcAttrStats:
339
+			stats, err := assembleStats(attr.Value)
340
+			if err != nil {
341
+				return nil, err
342
+			}
343
+			s.Stats = stats
344
+		}
345
+
346
+	}
347
+
348
+	// parse Address after parse AddressFamily incase of parseIP error
349
+	if addressBytes != nil {
350
+		ip, err := parseIP(addressBytes, s.AddressFamily)
351
+		if err != nil {
352
+			return nil, err
353
+		}
354
+		s.Address = ip
355
+	}
356
+
357
+	return &s, nil
358
+}
359
+
360
+// parseService given a ipvs netlink response this function will respond with a valid service entry, an error otherwise
361
+func (i *Handle) parseService(msg []byte) (*Service, error) {
362
+	var s *Service
363
+
364
+	// Remove General header for this message and parse the NetLink message
365
+	hdr := deserializeGenlMsg(msg)
366
+	NetLinkAttrs, err := nl.ParseRouteAttr(msg[hdr.Len():])
367
+	if err != nil {
368
+		return nil, err
369
+	}
370
+	if len(NetLinkAttrs) == 0 {
371
+		return nil, fmt.Errorf("error no valid netlink message found while parsing service record")
372
+	}
373
+
374
+	// Now Parse and get IPVS related attributes messages packed in this message.
375
+	ipvsAttrs, err := nl.ParseRouteAttr(NetLinkAttrs[0].Value)
376
+	if err != nil {
377
+		return nil, err
378
+	}
379
+
380
+	// Assemble all the IPVS related attribute messages and create a service record
381
+	s, err = assembleService(ipvsAttrs)
382
+	if err != nil {
383
+		return nil, err
384
+	}
385
+
386
+	return s, nil
387
+}
388
+
389
+// doGetServicesCmd a wrapper which could be used commonly for both GetServices() and GetService(*Service)
390
+func (i *Handle) doGetServicesCmd(svc *Service) ([]*Service, error) {
391
+	var res []*Service
392
+
393
+	msgs, err := i.doCmdwithResponse(svc, nil, ipvsCmdGetService)
394
+	if err != nil {
395
+		return nil, err
396
+	}
397
+
398
+	for _, msg := range msgs {
399
+		srv, err := i.parseService(msg)
400
+		if err != nil {
401
+			return nil, err
402
+		}
403
+		res = append(res, srv)
404
+	}
405
+
406
+	return res, nil
407
+}
408
+
409
+// doCmdWithoutAttr a simple wrapper of netlink socket execute command
410
+func (i *Handle) doCmdWithoutAttr(cmd uint8) ([][]byte, error) {
411
+	req := newIPVSRequest(cmd)
412
+	req.Seq = atomic.AddUint32(&i.seq, 1)
413
+	return execute(i.sock, req, 0)
414
+}
415
+
416
+func assembleDestination(attrs []syscall.NetlinkRouteAttr) (*Destination, error) {
417
+	var d Destination
418
+	var addressBytes []byte
419
+
420
+	for _, attr := range attrs {
421
+
422
+		attrType := int(attr.Attr.Type)
423
+
424
+		switch attrType {
425
+
426
+		case ipvsDestAttrAddressFamily:
427
+			d.AddressFamily = native.Uint16(attr.Value)
428
+		case ipvsDestAttrAddress:
429
+			addressBytes = attr.Value
430
+		case ipvsDestAttrPort:
431
+			d.Port = binary.BigEndian.Uint16(attr.Value)
432
+		case ipvsDestAttrForwardingMethod:
433
+			d.ConnectionFlags = native.Uint32(attr.Value)
434
+		case ipvsDestAttrWeight:
435
+			d.Weight = int(native.Uint16(attr.Value))
436
+		case ipvsDestAttrUpperThreshold:
437
+			d.UpperThreshold = native.Uint32(attr.Value)
438
+		case ipvsDestAttrLowerThreshold:
439
+			d.LowerThreshold = native.Uint32(attr.Value)
440
+		case ipvsDestAttrActiveConnections:
441
+			d.ActiveConnections = int(native.Uint32(attr.Value))
442
+		case ipvsDestAttrInactiveConnections:
443
+			d.InactiveConnections = int(native.Uint32(attr.Value))
444
+		case ipvsDestAttrStats:
445
+			stats, err := assembleStats(attr.Value)
446
+			if err != nil {
447
+				return nil, err
448
+			}
449
+			d.Stats = DstStats(stats)
450
+		}
451
+	}
452
+
453
+	// in older kernels (< 3.18), the destination address family attribute doesn't exist so we must
454
+	// assume it based on the destination address provided.
455
+	if d.AddressFamily == 0 {
456
+		// we can't check the address family using net stdlib because netlink returns
457
+		// IPv4 addresses as the first 4 bytes in a []byte of length 16 where as
458
+		// stdlib expects it as the last 4 bytes.
459
+		addressFamily, err := getIPFamily(addressBytes)
460
+		if err != nil {
461
+			return nil, err
462
+		}
463
+		d.AddressFamily = addressFamily
464
+	}
465
+
466
+	// parse Address after parse AddressFamily incase of parseIP error
467
+	if addressBytes != nil {
468
+		ip, err := parseIP(addressBytes, d.AddressFamily)
469
+		if err != nil {
470
+			return nil, err
471
+		}
472
+		d.Address = ip
473
+	}
474
+
475
+	return &d, nil
476
+}
477
+
478
+// getIPFamily parses the IP family based on raw data from netlink.
479
+// For AF_INET, netlink will set the first 4 bytes with trailing zeros
480
+//
481
+//	10.0.0.1 -> [10 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0]
482
+//
483
+// For AF_INET6, the full 16 byte array is used:
484
+//
485
+//	2001:db8:3c4d:15::1a00 -> [32 1 13 184 60 77 0 21 0 0 0 0 0 0 26 0]
486
+func getIPFamily(address []byte) (uint16, error) {
487
+	if len(address) == 4 {
488
+		return syscall.AF_INET, nil
489
+	}
490
+
491
+	if isZeros(address) {
492
+		return 0, errors.New("could not parse IP family from address data")
493
+	}
494
+
495
+	// assume IPv4 if first 4 bytes are non-zero but rest of the data is trailing zeros
496
+	if !isZeros(address[:4]) && isZeros(address[4:]) {
497
+		return syscall.AF_INET, nil
498
+	}
499
+
500
+	return syscall.AF_INET6, nil
501
+}
502
+
503
+func isZeros(b []byte) bool {
504
+	for i := 0; i < len(b); i++ {
505
+		if b[i] != 0 {
506
+			return false
507
+		}
508
+	}
509
+	return true
510
+}
511
+
512
+// parseDestination given a ipvs netlink response this function will respond with a valid destination entry, an error otherwise
513
+func (i *Handle) parseDestination(msg []byte) (*Destination, error) {
514
+	var dst *Destination
515
+
516
+	// Remove General header for this message
517
+	hdr := deserializeGenlMsg(msg)
518
+	NetLinkAttrs, err := nl.ParseRouteAttr(msg[hdr.Len():])
519
+	if err != nil {
520
+		return nil, err
521
+	}
522
+	if len(NetLinkAttrs) == 0 {
523
+		return nil, fmt.Errorf("error no valid netlink message found while parsing destination record")
524
+	}
525
+
526
+	// Now Parse and get IPVS related attributes messages packed in this message.
527
+	ipvsAttrs, err := nl.ParseRouteAttr(NetLinkAttrs[0].Value)
528
+	if err != nil {
529
+		return nil, err
530
+	}
531
+
532
+	// Assemble netlink attributes and create a Destination record
533
+	dst, err = assembleDestination(ipvsAttrs)
534
+	if err != nil {
535
+		return nil, err
536
+	}
537
+
538
+	return dst, nil
539
+}
540
+
541
+// doGetDestinationsCmd a wrapper function to be used by GetDestinations and GetDestination(d) apis
542
+func (i *Handle) doGetDestinationsCmd(s *Service, d *Destination) ([]*Destination, error) {
543
+	var res []*Destination
544
+
545
+	msgs, err := i.doCmdwithResponse(s, d, ipvsCmdGetDest)
546
+	if err != nil {
547
+		return nil, err
548
+	}
549
+
550
+	for _, msg := range msgs {
551
+		dest, err := i.parseDestination(msg)
552
+		if err != nil {
553
+			return res, err
554
+		}
555
+		res = append(res, dest)
556
+	}
557
+	return res, nil
558
+}
559
+
560
+// parseConfig given a ipvs netlink response this function will respond with a valid config entry, an error otherwise
561
+func (i *Handle) parseConfig(msg []byte) (*Config, error) {
562
+	var c Config
563
+
564
+	// Remove General header for this message
565
+	hdr := deserializeGenlMsg(msg)
566
+	attrs, err := nl.ParseRouteAttr(msg[hdr.Len():])
567
+	if err != nil {
568
+		return nil, err
569
+	}
570
+
571
+	for _, attr := range attrs {
572
+		attrType := int(attr.Attr.Type)
573
+		switch attrType {
574
+		case ipvsCmdAttrTimeoutTCP:
575
+			c.TimeoutTCP = time.Duration(native.Uint32(attr.Value)) * time.Second
576
+		case ipvsCmdAttrTimeoutTCPFin:
577
+			c.TimeoutTCPFin = time.Duration(native.Uint32(attr.Value)) * time.Second
578
+		case ipvsCmdAttrTimeoutUDP:
579
+			c.TimeoutUDP = time.Duration(native.Uint32(attr.Value)) * time.Second
580
+		}
581
+	}
582
+
583
+	return &c, nil
584
+}
585
+
586
+// doGetConfigCmd a wrapper function to be used by GetConfig
587
+func (i *Handle) doGetConfigCmd() (*Config, error) {
588
+	msg, err := i.doCmdWithoutAttr(ipvsCmdGetConfig)
589
+	if err != nil {
590
+		return nil, err
591
+	}
592
+
593
+	res, err := i.parseConfig(msg[0])
594
+	if err != nil {
595
+		return res, err
596
+	}
597
+	return res, nil
598
+}
599
+
600
+// doSetConfigCmd a wrapper function to be used by SetConfig
601
+func (i *Handle) doSetConfigCmd(c *Config) error {
602
+	req := newIPVSRequest(ipvsCmdSetConfig)
603
+	req.Seq = atomic.AddUint32(&i.seq, 1)
604
+
605
+	req.AddData(nl.NewRtAttr(ipvsCmdAttrTimeoutTCP, nl.Uint32Attr(uint32(c.TimeoutTCP.Seconds()))))
606
+	req.AddData(nl.NewRtAttr(ipvsCmdAttrTimeoutTCPFin, nl.Uint32Attr(uint32(c.TimeoutTCPFin.Seconds()))))
607
+	req.AddData(nl.NewRtAttr(ipvsCmdAttrTimeoutUDP, nl.Uint32Attr(uint32(c.TimeoutUDP.Seconds()))))
608
+
609
+	_, err := execute(i.sock, req, 0)
610
+
611
+	return err
612
+}
613
+
614
+// IPVS related netlink message format explained
615
+
616
+/* EACH NETLINK MSG is of the below format, this is what we will receive from execute() api.
617
+   If we have multiple netlink objects to process like GetServices() etc., execute() will
618
+   supply an array of this below object
619
+
620
+            NETLINK MSG
621
+|-----------------------------------|
622
+    0        1        2        3
623
+|--------|--------|--------|--------| -
624
+| CMD ID |  VER   |    RESERVED     | |==> General Message Header represented by genlMsgHdr
625
+|-----------------------------------| -
626
+|    ATTR LEN     |   ATTR TYPE     | |
627
+|-----------------------------------| |
628
+|                                   | |
629
+|              VALUE                | |
630
+|     []byte Array of IPVS MSG      | |==> Attribute Message represented by syscall.NetlinkRouteAttr
631
+|        PADDED BY 4 BYTES          | |
632
+|                                   | |
633
+|-----------------------------------| -
634
+
635
+
636
+ Once We strip genlMsgHdr from above NETLINK MSG, we should parse the VALUE.
637
+ VALUE will have an array of netlink attributes (syscall.NetlinkRouteAttr) such that each attribute will
638
+ represent a "Service" or "Destination" object's field.  If we assemble these attributes we can construct
639
+ Service or Destination.
640
+
641
+            IPVS MSG
642
+|-----------------------------------|
643
+     0        1        2        3
644
+|--------|--------|--------|--------|
645
+|    ATTR LEN     |    ATTR TYPE    |
646
+|-----------------------------------|
647
+|                                   |
648
+|                                   |
649
+| []byte IPVS ATTRIBUTE  BY 4 BYTES |
650
+|                                   |
651
+|                                   |
652
+|-----------------------------------|
653
+           NEXT ATTRIBUTE
654
+|-----------------------------------|
655
+|    ATTR LEN     |    ATTR TYPE    |
656
+|-----------------------------------|
657
+|                                   |
658
+|                                   |
659
+| []byte IPVS ATTRIBUTE  BY 4 BYTES |
660
+|                                   |
661
+|                                   |
662
+|-----------------------------------|
663
+           NEXT ATTRIBUTE
664
+|-----------------------------------|
665
+|    ATTR LEN     |    ATTR TYPE    |
666
+|-----------------------------------|
667
+|                                   |
668
+|                                   |
669
+| []byte IPVS ATTRIBUTE  BY 4 BYTES |
670
+|                                   |
671
+|                                   |
672
+|-----------------------------------|
673
+
674
+*/
... ...
@@ -640,8 +640,8 @@ github.com/moby/buildkit/util/urlutil
640 640
 github.com/moby/buildkit/util/winlayers
641 641
 github.com/moby/buildkit/version
642 642
 github.com/moby/buildkit/worker
643
-# github.com/moby/ipvs v1.0.2
644
-## explicit; go 1.13
643
+# github.com/moby/ipvs v1.1.0
644
+## explicit; go 1.17
645 645
 github.com/moby/ipvs
646 646
 # github.com/moby/locker v1.0.1
647 647
 ## explicit; go 1.13