Browse code

Make IP allocator lazy

Instead of allocating all possible IPs in advance, generate them as
needed.

A loop will cycle through all possible IPs in sequential order,
allocating them as needed and marking them as in use. Once the loop
exhausts all IPs, it will wrap back to the beginning. IPs that are
already in use will be skipped. When an IP is released, it will be
cleared and be available for allocation again.

Two decisions went into this design:

1) Minimize memory footprint by only allocating IPs that are actually
in use

2) Minimize reuse of released IP addresses to avoid sending traffic to
the wrong containers

As a side effect, the functions for IP/Mask<->int conversion have been
rewritten to never be able to fail in order to reduce the amount of
error returns.

Fixes gh-231

Dominik Honnef authored on 2013/03/31 07:32:10
Showing 4 changed files
... ...
@@ -7,6 +7,7 @@ Caleb Spare <cespare@gmail.com>
7 7
 Charles Hooper <charles.hooper@dotcloud.com>
8 8
 Daniel Mizyrycki <daniel.mizyrycki@dotcloud.com>
9 9
 Daniel Robinson <gottagetmac@gmail.com>
10
+Dominik Honnef <dominik@honnef.co>
10 11
 Don Spaulding <donspauldingii@gmail.com>
11 12
 ezbercih <cem.ezberci@gmail.com>
12 13
 Frederick F. Kautz IV <fkautz@alumni.cmu.edu>
... ...
@@ -363,11 +363,10 @@ func (container *Container) allocateNetwork() error {
363 363
 	return nil
364 364
 }
365 365
 
366
-func (container *Container) releaseNetwork() error {
367
-	err := container.network.Release()
366
+func (container *Container) releaseNetwork()  {
367
+	container.network.Release()
368 368
 	container.network = nil
369 369
 	container.NetworkSettings = &NetworkSettings{}
370
-	return err
371 370
 }
372 371
 
373 372
 func (container *Container) monitor() {
... ...
@@ -382,9 +381,7 @@ func (container *Container) monitor() {
382 382
 	exitCode := container.cmd.ProcessState.Sys().(syscall.WaitStatus).ExitStatus()
383 383
 
384 384
 	// Cleanup
385
-	if err := container.releaseNetwork(); err != nil {
386
-		log.Printf("%v: Failed to release network: %v", container.Id, err)
387
-	}
385
+	container.releaseNetwork()
388 386
 	if container.Config.OpenStdin {
389 387
 		if err := container.stdin.Close(); err != nil {
390 388
 			Debugf("%s: Error close stdin: %s", container.Id, err)
... ...
@@ -1,7 +1,6 @@
1 1
 package docker
2 2
 
3 3
 import (
4
-	"bytes"
5 4
 	"encoding/binary"
6 5
 	"errors"
7 6
 	"fmt"
... ...
@@ -30,40 +29,25 @@ func networkRange(network *net.IPNet) (net.IP, net.IP) {
30 30
 }
31 31
 
32 32
 // Converts a 4 bytes IP into a 32 bit integer
33
-func ipToInt(ip net.IP) (int32, error) {
34
-	buf := bytes.NewBuffer(ip.To4())
35
-	var n int32
36
-	if err := binary.Read(buf, binary.BigEndian, &n); err != nil {
37
-		return 0, err
38
-	}
39
-	return n, nil
33
+func ipToInt(ip net.IP) int32 {
34
+	return int32(binary.BigEndian.Uint32(ip.To4()))
40 35
 }
41 36
 
42 37
 // Converts 32 bit integer into a 4 bytes IP address
43
-func intToIp(n int32) (net.IP, error) {
44
-	var buf bytes.Buffer
45
-	if err := binary.Write(&buf, binary.BigEndian, &n); err != nil {
46
-		return net.IP{}, err
47
-	}
48
-	ip := net.IPv4(0, 0, 0, 0).To4()
49
-	for i := 0; i < net.IPv4len; i++ {
50
-		ip[i] = buf.Bytes()[i]
51
-	}
52
-	return ip, nil
38
+func intToIp(n int32) net.IP {
39
+	b := make([]byte, 4)
40
+	binary.BigEndian.PutUint32(b, uint32(n))
41
+	return net.IP(b)
53 42
 }
54 43
 
55 44
 // Given a netmask, calculates the number of available hosts
56
-func networkSize(mask net.IPMask) (int32, error) {
45
+func networkSize(mask net.IPMask) int32 {
57 46
 	m := net.IPv4Mask(0, 0, 0, 0)
58 47
 	for i := 0; i < net.IPv4len; i++ {
59 48
 		m[i] = ^mask[i]
60 49
 	}
61
-	buf := bytes.NewBuffer(m)
62
-	var n int32
63
-	if err := binary.Read(buf, binary.BigEndian, &n); err != nil {
64
-		return 0, err
65
-	}
66
-	return n + 1, nil
50
+
51
+	return int32(binary.BigEndian.Uint32(m)) + 1
67 52
 }
68 53
 
69 54
 // Wrapper around the iptables command
... ...
@@ -211,66 +195,97 @@ func newPortAllocator(start, end int) (*PortAllocator, error) {
211 211
 
212 212
 // IP allocator: Atomatically allocate and release networking ports
213 213
 type IPAllocator struct {
214
-	network *net.IPNet
215
-	queue   chan (net.IP)
214
+	network       *net.IPNet
215
+	queueAlloc    chan allocatedIP
216
+	queueReleased chan net.IP
217
+	inUse         map[int32]struct{}
218
+}
219
+
220
+type allocatedIP struct {
221
+	ip  net.IP
222
+	err error
216 223
 }
217 224
 
218
-func (alloc *IPAllocator) populate() error {
225
+func (alloc *IPAllocator) run() {
219 226
 	firstIP, _ := networkRange(alloc.network)
220
-	size, err := networkSize(alloc.network.Mask)
221
-	if err != nil {
222
-		return err
223
-	}
224
-	// The queue size should be the network size - 3
225
-	// -1 for the network address, -1 for the broadcast address and
226
-	// -1 for the gateway address
227
-	alloc.queue = make(chan net.IP, size-3)
228
-	for i := int32(1); i < size-1; i++ {
229
-		ipNum, err := ipToInt(firstIP)
230
-		if err != nil {
231
-			return err
227
+	ipNum := ipToInt(firstIP)
228
+	ownIP := ipToInt(alloc.network.IP)
229
+	size := networkSize(alloc.network.Mask)
230
+
231
+	pos := int32(1)
232
+	max := size - 2 // -1 for the broadcast address, -1 for the gateway address
233
+	for {
234
+		var (
235
+			newNum int32
236
+			inUse  bool
237
+		)
238
+
239
+		// Find first unused IP, give up after one whole round
240
+		for attempt := int32(0); attempt < max; attempt++ {
241
+			newNum = ipNum + pos
242
+
243
+			pos = pos%max + 1
244
+
245
+			// The network's IP is never okay to use
246
+			if newNum == ownIP {
247
+				continue
248
+			}
249
+
250
+			if _, inUse = alloc.inUse[newNum]; !inUse {
251
+				// We found an unused IP
252
+				break
253
+			}
232 254
 		}
233
-		ip, err := intToIp(ipNum + int32(i))
234
-		if err != nil {
235
-			return err
255
+
256
+		ip := allocatedIP{ip: intToIp(newNum)}
257
+		if inUse {
258
+			ip.err = errors.New("No unallocated IP available")
236 259
 		}
237
-		// Discard the network IP (that's the host IP address)
238
-		if ip.Equal(alloc.network.IP) {
239
-			continue
260
+
261
+		select {
262
+		case alloc.queueAlloc <- ip:
263
+			alloc.inUse[newNum] = struct{}{}
264
+		case released := <-alloc.queueReleased:
265
+			r := ipToInt(released)
266
+			delete(alloc.inUse, r)
267
+
268
+			if inUse {
269
+				// If we couldn't allocate a new IP, the released one
270
+				// will be the only free one now, so instantly use it
271
+				// next time
272
+				pos = r - ipNum
273
+			} else {
274
+				// Use same IP as last time
275
+				if pos == 1 {
276
+					pos = max
277
+				} else {
278
+					pos--
279
+				}
280
+			}
240 281
 		}
241
-		alloc.queue <- ip
242 282
 	}
243
-	return nil
244 283
 }
245 284
 
246 285
 func (alloc *IPAllocator) Acquire() (net.IP, error) {
247
-	select {
248
-	case ip := <-alloc.queue:
249
-		return ip, nil
250
-	default:
251
-		return net.IP{}, errors.New("No more IP addresses available")
252
-	}
253
-	return net.IP{}, nil
286
+	ip := <-alloc.queueAlloc
287
+	return ip.ip, ip.err
254 288
 }
255 289
 
256
-func (alloc *IPAllocator) Release(ip net.IP) error {
257
-	select {
258
-	case alloc.queue <- ip:
259
-		return nil
260
-	default:
261
-		return errors.New("Too many IP addresses have been released")
262
-	}
263
-	return nil
290
+func (alloc *IPAllocator) Release(ip net.IP) {
291
+	alloc.queueReleased <- ip
264 292
 }
265 293
 
266
-func newIPAllocator(network *net.IPNet) (*IPAllocator, error) {
294
+func newIPAllocator(network *net.IPNet) *IPAllocator {
267 295
 	alloc := &IPAllocator{
268
-		network: network,
296
+		network:       network,
297
+		queueAlloc:    make(chan allocatedIP),
298
+		queueReleased: make(chan net.IP),
299
+		inUse:         make(map[int32]struct{}),
269 300
 	}
270
-	if err := alloc.populate(); err != nil {
271
-		return nil, err
272
-	}
273
-	return alloc, nil
301
+
302
+	go alloc.run()
303
+
304
+	return alloc
274 305
 }
275 306
 
276 307
 // Network interface represents the networking stack of a container
... ...
@@ -297,7 +312,7 @@ func (iface *NetworkInterface) AllocatePort(port int) (int, error) {
297 297
 }
298 298
 
299 299
 // Release: Network cleanup - release all resources
300
-func (iface *NetworkInterface) Release() error {
300
+func (iface *NetworkInterface) Release() {
301 301
 	for _, port := range iface.extPorts {
302 302
 		if err := iface.manager.portMapper.Unmap(port); err != nil {
303 303
 			log.Printf("Unable to unmap port %v: %v", port, err)
... ...
@@ -307,7 +322,8 @@ func (iface *NetworkInterface) Release() error {
307 307
 		}
308 308
 
309 309
 	}
310
-	return iface.manager.ipAllocator.Release(iface.IPNet.IP)
310
+
311
+	iface.manager.ipAllocator.Release(iface.IPNet.IP)
311 312
 }
312 313
 
313 314
 // Network Manager manages a set of network interfaces
... ...
@@ -342,10 +358,7 @@ func newNetworkManager(bridgeIface string) (*NetworkManager, error) {
342 342
 	}
343 343
 	network := addr.(*net.IPNet)
344 344
 
345
-	ipAllocator, err := newIPAllocator(network)
346
-	if err != nil {
347
-		return nil, err
348
-	}
345
+	ipAllocator := newIPAllocator(network)
349 346
 
350 347
 	portAllocator, err := newPortAllocator(portRangeStart, portRangeEnd)
351 348
 	if err != nil {
... ...
@@ -28,8 +28,8 @@ func TestNetworkRange(t *testing.T) {
28 28
 	if !last.Equal(net.ParseIP("192.168.0.255")) {
29 29
 		t.Error(last.String())
30 30
 	}
31
-	if size, err := networkSize(network.Mask); err != nil || size != 256 {
32
-		t.Error(size, err)
31
+	if size := networkSize(network.Mask); size != 256 {
32
+		t.Error(size)
33 33
 	}
34 34
 
35 35
 	// Class A test
... ...
@@ -41,8 +41,8 @@ func TestNetworkRange(t *testing.T) {
41 41
 	if !last.Equal(net.ParseIP("10.255.255.255")) {
42 42
 		t.Error(last.String())
43 43
 	}
44
-	if size, err := networkSize(network.Mask); err != nil || size != 16777216 {
45
-		t.Error(size, err)
44
+	if size := networkSize(network.Mask); size != 16777216 {
45
+		t.Error(size)
46 46
 	}
47 47
 
48 48
 	// Class A, random IP address
... ...
@@ -64,8 +64,8 @@ func TestNetworkRange(t *testing.T) {
64 64
 	if !last.Equal(net.ParseIP("10.1.2.3")) {
65 65
 		t.Error(last.String())
66 66
 	}
67
-	if size, err := networkSize(network.Mask); err != nil || size != 1 {
68
-		t.Error(size, err)
67
+	if size := networkSize(network.Mask); size != 1 {
68
+		t.Error(size)
69 69
 	}
70 70
 
71 71
 	// 31bit mask
... ...
@@ -77,8 +77,8 @@ func TestNetworkRange(t *testing.T) {
77 77
 	if !last.Equal(net.ParseIP("10.1.2.3")) {
78 78
 		t.Error(last.String())
79 79
 	}
80
-	if size, err := networkSize(network.Mask); err != nil || size != 2 {
81
-		t.Error(size, err)
80
+	if size := networkSize(network.Mask); size != 2 {
81
+		t.Error(size)
82 82
 	}
83 83
 
84 84
 	// 26bit mask
... ...
@@ -90,54 +90,130 @@ func TestNetworkRange(t *testing.T) {
90 90
 	if !last.Equal(net.ParseIP("10.1.2.63")) {
91 91
 		t.Error(last.String())
92 92
 	}
93
-	if size, err := networkSize(network.Mask); err != nil || size != 64 {
94
-		t.Error(size, err)
93
+	if size := networkSize(network.Mask); size != 64 {
94
+		t.Error(size)
95 95
 	}
96 96
 }
97 97
 
98 98
 func TestConversion(t *testing.T) {
99 99
 	ip := net.ParseIP("127.0.0.1")
100
-	i, err := ipToInt(ip)
101
-	if err != nil {
102
-		t.Fatal(err)
103
-	}
100
+	i := ipToInt(ip)
104 101
 	if i == 0 {
105 102
 		t.Fatal("converted to zero")
106 103
 	}
107
-	conv, err := intToIp(i)
108
-	if err != nil {
109
-		t.Fatal(err)
110
-	}
104
+	conv := intToIp(i)
111 105
 	if !ip.Equal(conv) {
112 106
 		t.Error(conv.String())
113 107
 	}
114 108
 }
115 109
 
116 110
 func TestIPAllocator(t *testing.T) {
117
-	gwIP, n, _ := net.ParseCIDR("127.0.0.1/29")
118
-	alloc, err := newIPAllocator(&net.IPNet{IP: gwIP, Mask: n.Mask})
119
-	if err != nil {
120
-		t.Fatal(err)
111
+	expectedIPs := []net.IP{
112
+		0: net.IPv4(127, 0, 0, 2),
113
+		1: net.IPv4(127, 0, 0, 3),
114
+		2: net.IPv4(127, 0, 0, 4),
115
+		3: net.IPv4(127, 0, 0, 5),
116
+		4: net.IPv4(127, 0, 0, 6),
121 117
 	}
122
-	var lastIP net.IP
118
+
119
+	gwIP, n, _ := net.ParseCIDR("127.0.0.1/29")
120
+	alloc := newIPAllocator(&net.IPNet{IP: gwIP, Mask: n.Mask})
121
+	// Pool after initialisation (f = free, u = used)
122
+	// 2(f) - 3(f) - 4(f) - 5(f) - 6(f)
123
+	//  ↑
124
+
125
+	// Check that we get 5 IPs, from 127.0.0.2–127.0.0.6, in that
126
+	// order.
123 127
 	for i := 0; i < 5; i++ {
124 128
 		ip, err := alloc.Acquire()
125 129
 		if err != nil {
126 130
 			t.Fatal(err)
127 131
 		}
128
-		lastIP = ip
132
+
133
+		assertIPEquals(t, expectedIPs[i], ip)
129 134
 	}
130
-	ip, err := alloc.Acquire()
135
+	// Before loop begin
136
+	// 2(f) - 3(f) - 4(f) - 5(f) - 6(f)
137
+	//  ↑
138
+
139
+	// After i = 0
140
+	// 2(u) - 3(f) - 4(f) - 5(f) - 6(f)
141
+	//         ↑
142
+
143
+	// After i = 1
144
+	// 2(u) - 3(u) - 4(f) - 5(f) - 6(f)
145
+	//                ↑
146
+
147
+	// After i = 2
148
+	// 2(u) - 3(u) - 4(u) - 5(f) - 6(f)
149
+	//                       ↑
150
+
151
+	// After i = 3
152
+	// 2(u) - 3(u) - 4(u) - 5(u) - 6(f)
153
+	//                              ↑
154
+
155
+	// After i = 4
156
+	// 2(u) - 3(u) - 4(u) - 5(u) - 6(u)
157
+	//  ↑
158
+
159
+	// Check that there are no more IPs
160
+	_, err := alloc.Acquire()
131 161
 	if err == nil {
132 162
 		t.Fatal("There shouldn't be any IP addresses at this point")
133 163
 	}
134
-	// Release 1 IP
135
-	alloc.Release(lastIP)
136
-	ip, err = alloc.Acquire()
137
-	if err != nil {
138
-		t.Fatal(err)
164
+
165
+	// Release some IPs in non-sequential order
166
+	alloc.Release(expectedIPs[3])
167
+	// 2(u) - 3(u) - 4(u) - 5(f) - 6(u)
168
+	//                       ↑
169
+
170
+	alloc.Release(expectedIPs[2])
171
+	// 2(u) - 3(u) - 4(f) - 5(f) - 6(u)
172
+	//                       ↑
173
+
174
+	alloc.Release(expectedIPs[4])
175
+	// 2(u) - 3(u) - 4(f) - 5(f) - 6(f)
176
+	//                       ↑
177
+
178
+	// Make sure that IPs are reused in sequential order, starting
179
+	// with the first released IP
180
+	newIPs := make([]net.IP, 3)
181
+	for i := 0; i < 3; i++ {
182
+		ip, err := alloc.Acquire()
183
+		if err != nil {
184
+			t.Fatal(err)
185
+		}
186
+
187
+		newIPs[i] = ip
188
+	}
189
+	// Before loop begin
190
+	// 2(u) - 3(u) - 4(f) - 5(f) - 6(f)
191
+	//                       ↑
192
+
193
+	// After i = 0
194
+	// 2(u) - 3(u) - 4(f) - 5(u) - 6(f)
195
+	//                              ↑
196
+
197
+	// After i = 1
198
+	// 2(u) - 3(u) - 4(f) - 5(u) - 6(u)
199
+	//                ↑
200
+
201
+	// After i = 2
202
+	// 2(u) - 3(u) - 4(u) - 5(u) - 6(u)
203
+	//                       ↑
204
+
205
+	assertIPEquals(t, expectedIPs[3], newIPs[0])
206
+	assertIPEquals(t, expectedIPs[4], newIPs[1])
207
+	assertIPEquals(t, expectedIPs[2], newIPs[2])
208
+
209
+	_, err = alloc.Acquire()
210
+	if err == nil {
211
+		t.Fatal("There shouldn't be any IP addresses at this point")
139 212
 	}
140
-	if !ip.Equal(lastIP) {
141
-		t.Fatal(ip.String())
213
+}
214
+
215
+func assertIPEquals(t *testing.T, ip1, ip2 net.IP) {
216
+	if !ip1.Equal(ip2) {
217
+		t.Fatalf("Expected IP %s, got %s", ip1, ip2)
142 218
 	}
143 219
 }