Browse code

Vendoring libnetwork ed311d0

Signed-off-by: Alessandro Boch <aboch@docker.com>
(cherry picked from commit e26c513fa818a57b4ea572121e153cf6f056e1ee)

Alessandro Boch authored on 2016/06/23 01:40:20
Showing 14 changed files
... ...
@@ -65,7 +65,7 @@ clone git github.com/RackSec/srslog 259aed10dfa74ea2961eddd1d9847619f6e98837
65 65
 clone git github.com/imdario/mergo 0.2.1
66 66
 
67 67
 #get libnetwork packages
68
-clone git github.com/docker/libnetwork ab62dd6bf06bf0637d66d529931b69a5544468cb
68
+clone git github.com/docker/libnetwork  ed311d050fda7821f2e7c53a7e08a0205923aef5
69 69
 clone git github.com/docker/go-events 39718a26497694185f8fb58a7d6f31947f3dc42d
70 70
 clone git github.com/armon/go-radix e39d623f12e8e41c7b5529e9a9dd67a1e2261f80
71 71
 clone git github.com/armon/go-metrics eb0af217e5e9747e41dd5303755356b62d28e3ec
... ...
@@ -3,12 +3,10 @@ package libnetwork
3 3
 //go:generate protoc -I.:Godeps/_workspace/src/github.com/gogo/protobuf  --gogo_out=import_path=github.com/docker/libnetwork,Mgogoproto/gogo.proto=github.com/gogo/protobuf/gogoproto:. agent.proto
4 4
 
5 5
 import (
6
-	"encoding/hex"
7 6
 	"fmt"
8 7
 	"net"
9 8
 	"os"
10 9
 	"sort"
11
-	"strconv"
12 10
 
13 11
 	"github.com/Sirupsen/logrus"
14 12
 	"github.com/docker/go-events"
... ...
@@ -20,6 +18,12 @@ import (
20 20
 	"github.com/gogo/protobuf/proto"
21 21
 )
22 22
 
23
+const (
24
+	subsysGossip = "networking:gossip"
25
+	subsysIPSec  = "networking:ipsec"
26
+	keyringSize  = 3
27
+)
28
+
23 29
 // ByTime implements sort.Interface for []*types.EncryptionKey based on
24 30
 // the LamportTime field.
25 31
 type ByTime []*types.EncryptionKey
... ...
@@ -80,6 +84,82 @@ func resolveAddr(addrOrInterface string) (string, error) {
80 80
 func (c *controller) handleKeyChange(keys []*types.EncryptionKey) error {
81 81
 	drvEnc := discoverapi.DriverEncryptionUpdate{}
82 82
 
83
+	a := c.agent
84
+	// Find the deleted key. If the deleted key was the primary key,
85
+	// a new primary key should be set before removing if from keyring.
86
+	deleted := []byte{}
87
+	j := len(c.keys)
88
+	for i := 0; i < j; {
89
+		same := false
90
+		for _, key := range keys {
91
+			if same = key.LamportTime == c.keys[i].LamportTime; same {
92
+				break
93
+			}
94
+		}
95
+		if !same {
96
+			cKey := c.keys[i]
97
+			if cKey.Subsystem == subsysGossip {
98
+				deleted = cKey.Key
99
+			}
100
+
101
+			if cKey.Subsystem == subsysGossip /* subsysIPSec */ {
102
+				drvEnc.Prune = cKey.Key
103
+				drvEnc.PruneTag = cKey.LamportTime
104
+			}
105
+			c.keys[i], c.keys[j-1] = c.keys[j-1], c.keys[i]
106
+			c.keys[j-1] = nil
107
+			j--
108
+		}
109
+		i++
110
+	}
111
+	c.keys = c.keys[:j]
112
+
113
+	// Find the new key and add it to the key ring
114
+	for _, key := range keys {
115
+		same := false
116
+		for _, cKey := range c.keys {
117
+			if same = cKey.LamportTime == key.LamportTime; same {
118
+				break
119
+			}
120
+		}
121
+		if !same {
122
+			c.keys = append(c.keys, key)
123
+			if key.Subsystem == subsysGossip {
124
+				a.networkDB.SetKey(key.Key)
125
+			}
126
+
127
+			if key.Subsystem == subsysGossip /*subsysIPSec*/ {
128
+				drvEnc.Key = key.Key
129
+				drvEnc.Tag = key.LamportTime
130
+			}
131
+		}
132
+	}
133
+
134
+	key, tag := c.getPrimaryKeyTag(subsysGossip)
135
+	a.networkDB.SetPrimaryKey(key)
136
+
137
+	//key, tag = c.getPrimaryKeyTag(subsysIPSec)
138
+	drvEnc.Primary = key
139
+	drvEnc.PrimaryTag = tag
140
+
141
+	if len(deleted) > 0 {
142
+		a.networkDB.RemoveKey(deleted)
143
+	}
144
+
145
+	c.drvRegistry.WalkDrivers(func(name string, driver driverapi.Driver, capability driverapi.Capability) bool {
146
+		err := driver.DiscoverNew(discoverapi.EncryptionKeysUpdate, drvEnc)
147
+		if err != nil {
148
+			logrus.Warnf("Failed to update datapath keys in driver %s: %v", name, err)
149
+		}
150
+		return false
151
+	})
152
+
153
+	return nil
154
+}
155
+
156
+func (c *controller) handleKeyChangeV1(keys []*types.EncryptionKey) error {
157
+	drvEnc := discoverapi.DriverEncryptionUpdate{}
158
+
83 159
 	// Find the new key and add it to the key ring
84 160
 	a := c.agent
85 161
 	for _, key := range keys {
... ...
@@ -91,12 +171,12 @@ func (c *controller) handleKeyChange(keys []*types.EncryptionKey) error {
91 91
 		}
92 92
 		if !same {
93 93
 			c.keys = append(c.keys, key)
94
-			if key.Subsystem == "networking:gossip" {
94
+			if key.Subsystem == subsysGossip {
95 95
 				a.networkDB.SetKey(key.Key)
96 96
 			}
97
-			if key.Subsystem == "networking:gossip" /*"networking:ipsec"*/ {
98
-				drvEnc.Key = hex.EncodeToString(key.Key)
99
-				drvEnc.Tag = strconv.FormatUint(key.LamportTime, 10)
97
+			if key.Subsystem == subsysGossip /*subsysIPSec*/ {
98
+				drvEnc.Key = key.Key
99
+				drvEnc.Tag = key.LamportTime
100 100
 			}
101 101
 			break
102 102
 		}
... ...
@@ -112,12 +192,12 @@ func (c *controller) handleKeyChange(keys []*types.EncryptionKey) error {
112 112
 			}
113 113
 		}
114 114
 		if !same {
115
-			if cKey.Subsystem == "networking:gossip" {
115
+			if cKey.Subsystem == subsysGossip {
116 116
 				deleted = cKey.Key
117 117
 			}
118
-			if cKey.Subsystem == "networking:gossip" /*"networking:ipsec"*/ {
119
-				drvEnc.Prune = hex.EncodeToString(cKey.Key)
120
-				drvEnc.PruneTag = strconv.FormatUint(cKey.LamportTime, 10)
118
+			if cKey.Subsystem == subsysGossip /*subsysIPSec*/ {
119
+				drvEnc.Prune = cKey.Key
120
+				drvEnc.PruneTag = cKey.LamportTime
121 121
 			}
122 122
 			c.keys = append(c.keys[:i], c.keys[i+1:]...)
123 123
 			break
... ...
@@ -126,15 +206,15 @@ func (c *controller) handleKeyChange(keys []*types.EncryptionKey) error {
126 126
 
127 127
 	sort.Sort(ByTime(c.keys))
128 128
 	for _, key := range c.keys {
129
-		if key.Subsystem == "networking:gossip" {
129
+		if key.Subsystem == subsysGossip {
130 130
 			a.networkDB.SetPrimaryKey(key.Key)
131 131
 			break
132 132
 		}
133 133
 	}
134 134
 	for _, key := range c.keys {
135
-		if key.Subsystem == "networking:gossip" /*"networking:ipsec"*/ {
136
-			drvEnc.Primary = hex.EncodeToString(key.Key)
137
-			drvEnc.PrimaryTag = strconv.FormatUint(key.LamportTime, 10)
135
+		if key.Subsystem == subsysGossip /*subsysIPSec*/ {
136
+			drvEnc.Primary = key.Key
137
+			drvEnc.PrimaryTag = key.LamportTime
138 138
 			break
139 139
 		}
140 140
 	}
... ...
@@ -197,26 +277,51 @@ func (c *controller) agentSetup() error {
197 197
 	return nil
198 198
 }
199 199
 
200
-func (c *controller) agentInit(bindAddrOrInterface string) error {
201
-	if !c.isAgent() {
202
-		return nil
200
+// For a given subsystem getKeys sorts the keys by lamport time and returns
201
+// slice of keys and lamport time which can used as a unique tag for the keys
202
+func (c *controller) getKeys(subsys string) ([][]byte, []uint64) {
203
+	sort.Sort(ByTime(c.keys))
204
+
205
+	keys := [][]byte{}
206
+	tags := []uint64{}
207
+	for _, key := range c.keys {
208
+		if key.Subsystem == subsys {
209
+			keys = append(keys, key.Key)
210
+			tags = append(tags, key.LamportTime)
211
+		}
203 212
 	}
204 213
 
205
-	drvEnc := discoverapi.DriverEncryptionConfig{}
214
+	if len(keys) < keyringSize {
215
+		return keys, tags
216
+	}
217
+	keys[0], keys[1] = keys[1], keys[0]
218
+	tags[0], tags[1] = tags[1], tags[0]
219
+	return keys, tags
220
+}
206 221
 
207
-	// sort the keys by lamport time
222
+// getPrimaryKeyTag returns the primary key for a given subsytem from the
223
+// list of sorted key and the associated tag
224
+func (c *controller) getPrimaryKeyTag(subsys string) ([]byte, uint64) {
208 225
 	sort.Sort(ByTime(c.keys))
209
-
210
-	gossipkey := [][]byte{}
226
+	keys := []*types.EncryptionKey{}
211 227
 	for _, key := range c.keys {
212
-		if key.Subsystem == "networking:gossip" {
213
-			gossipkey = append(gossipkey, key.Key)
214
-		}
215
-		if key.Subsystem == "networking:gossip" /*"networking:ipsec"*/ {
216
-			drvEnc.Keys = append(drvEnc.Keys, hex.EncodeToString(key.Key))
217
-			drvEnc.Tags = append(drvEnc.Tags, strconv.FormatUint(key.LamportTime, 10))
228
+		if key.Subsystem == subsys {
229
+			keys = append(keys, key)
218 230
 		}
219 231
 	}
232
+	return keys[1].Key, keys[1].LamportTime
233
+}
234
+
235
+func (c *controller) agentInit(bindAddrOrInterface string) error {
236
+	if !c.isAgent() {
237
+		return nil
238
+	}
239
+
240
+	drvEnc := discoverapi.DriverEncryptionConfig{}
241
+
242
+	keys, tags := c.getKeys(subsysGossip) // getKeys(subsysIPSec)
243
+	drvEnc.Keys = keys
244
+	drvEnc.Tags = tags
220 245
 
221 246
 	bindAddr, err := resolveAddr(bindAddrOrInterface)
222 247
 	if err != nil {
... ...
@@ -227,7 +332,7 @@ func (c *controller) agentInit(bindAddrOrInterface string) error {
227 227
 	nDB, err := networkdb.New(&networkdb.Config{
228 228
 		BindAddr: bindAddr,
229 229
 		NodeName: hostname,
230
-		Keys:     gossipkey,
230
+		Keys:     keys,
231 231
 	})
232 232
 
233 233
 	if err != nil {
... ...
@@ -275,12 +380,10 @@ func (c *controller) agentDriverNotify(d driverapi.Driver) {
275 275
 	})
276 276
 
277 277
 	drvEnc := discoverapi.DriverEncryptionConfig{}
278
-	for _, key := range c.keys {
279
-		if key.Subsystem == "networking:gossip" /*"networking:ipsec"*/ {
280
-			drvEnc.Keys = append(drvEnc.Keys, hex.EncodeToString(key.Key))
281
-			drvEnc.Tags = append(drvEnc.Tags, strconv.FormatUint(key.LamportTime, 10))
282
-		}
283
-	}
278
+	keys, tags := c.getKeys(subsysGossip) // getKeys(subsysIPSec)
279
+	drvEnc.Keys = keys
280
+	drvEnc.Tags = tags
281
+
284 282
 	c.drvRegistry.WalkDrivers(func(name string, driver driverapi.Driver, capability driverapi.Capability) bool {
285 283
 		err := driver.DiscoverNew(discoverapi.EncryptionKeysConfig, drvEnc)
286 284
 		if err != nil {
... ...
@@ -264,6 +264,9 @@ func (c *controller) SetKeys(keys []*types.EncryptionKey) error {
264 264
 		c.Unlock()
265 265
 		return nil
266 266
 	}
267
+	if len(keys) < keyringSize {
268
+		return c.handleKeyChangeV1(keys)
269
+	}
267 270
 	return c.handleKeyChange(keys)
268 271
 }
269 272
 
... ...
@@ -42,18 +42,18 @@ type DatastoreConfigData struct {
42 42
 // Key in first position is the primary key, the one to be used in tx.
43 43
 // Original key and tag types are []byte and uint64
44 44
 type DriverEncryptionConfig struct {
45
-	Keys []string
46
-	Tags []string
45
+	Keys [][]byte
46
+	Tags []uint64
47 47
 }
48 48
 
49 49
 // DriverEncryptionUpdate carries an update to the encryption key(s) as:
50 50
 // a new key and/or set a primary key and/or a removal of an existing key.
51 51
 // Original key and tag types are []byte and uint64
52 52
 type DriverEncryptionUpdate struct {
53
-	Key        string
54
-	Tag        string
55
-	Primary    string
56
-	PrimaryTag string
57
-	Prune      string
58
-	PruneTag   string
53
+	Key        []byte
54
+	Tag        uint64
55
+	Primary    []byte
56
+	PrimaryTag uint64
57
+	Prune      []byte
58
+	PruneTag   uint64
59 59
 }
... ...
@@ -33,7 +33,10 @@ type key struct {
33 33
 }
34 34
 
35 35
 func (k *key) String() string {
36
-	return fmt.Sprintf("(key: %s, tag: 0x%x)", hex.EncodeToString(k.value)[0:5], k.tag)
36
+	if k != nil {
37
+		return fmt.Sprintf("(key: %s, tag: 0x%x)", hex.EncodeToString(k.value)[0:5], k.tag)
38
+	}
39
+	return ""
37 40
 }
38 41
 
39 42
 type spi struct {
... ...
@@ -557,23 +560,3 @@ func updateNodeKey(lIP, rIP net.IP, idxs []*spi, curKeys []*key, newIdx, priIdx,
557 557
 
558 558
 	return spis
559 559
 }
560
-
561
-func parseEncryptionKey(value, tag string) (*key, error) {
562
-	var (
563
-		k   *key
564
-		err error
565
-	)
566
-	if value == "" {
567
-		return nil, nil
568
-	}
569
-	k = &key{}
570
-	if k.value, err = hex.DecodeString(value); err != nil {
571
-		return nil, types.BadRequestErrorf("failed to decode key (%s): %v", value, err)
572
-	}
573
-	t, err := strconv.ParseUint(tag, 10, 64)
574
-	if err != nil {
575
-		return nil, types.BadRequestErrorf("failed to decode tag (%s): %v", tag, err)
576
-	}
577
-	k.tag = uint32(t)
578
-	return k, nil
579
-}
... ...
@@ -3,6 +3,7 @@ package overlay
3 3
 import (
4 4
 	"fmt"
5 5
 	"net"
6
+	"syscall"
6 7
 
7 8
 	log "github.com/Sirupsen/logrus"
8 9
 	"github.com/docker/libnetwork/driverapi"
... ...
@@ -31,6 +32,12 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
31 31
 		return fmt.Errorf("cannot join secure network: encryption keys not present")
32 32
 	}
33 33
 
34
+	nlh := ns.NlHandle()
35
+
36
+	if n.secure && !nlh.SupportsNetlinkFamily(syscall.NETLINK_XFRM) {
37
+		return fmt.Errorf("cannot join secure network: required modules to install IPSEC rules are missing on host")
38
+	}
39
+
34 40
 	s := n.getSubnetforIP(ep.addr)
35 41
 	if s == nil {
36 42
 		return fmt.Errorf("could not find subnet for endpoint %s", eid)
... ...
@@ -65,8 +72,6 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
65 65
 		return fmt.Errorf("failed to update overlay endpoint %s to local data store: %v", ep.id[0:7], err)
66 66
 	}
67 67
 
68
-	nlh := ns.NlHandle()
69
-
70 68
 	// Set the container interface and its peer MTU to 1450 to allow
71 69
 	// for 50 bytes vxlan encap (inner eth header(14) + outer IP(20) +
72 70
 	// outer UDP(8) + vxlan header(8))
... ...
@@ -284,7 +284,7 @@ func populateVNITbl() {
284 284
 			}
285 285
 			defer ns.Close()
286 286
 
287
-			nlh, err := netlink.NewHandleAt(ns)
287
+			nlh, err := netlink.NewHandleAt(ns, syscall.NETLINK_ROUTE)
288 288
 			if err != nil {
289 289
 				logrus.Errorf("Could not open netlink handle during vni population for ns %s: %v", path, err)
290 290
 				return nil
... ...
@@ -3,6 +3,7 @@ package overlay
3 3
 import (
4 4
 	"fmt"
5 5
 	"strings"
6
+	"syscall"
6 7
 
7 8
 	"github.com/Sirupsen/logrus"
8 9
 	"github.com/docker/libnetwork/netutils"
... ...
@@ -128,7 +129,7 @@ func deleteVxlanByVNI(path string, vni uint32) error {
128 128
 		}
129 129
 		defer ns.Close()
130 130
 
131
-		nlh, err = netlink.NewHandleAt(ns)
131
+		nlh, err = netlink.NewHandleAt(ns, syscall.NETLINK_ROUTE)
132 132
 		if err != nil {
133 133
 			return fmt.Errorf("failed to get netlink handle for ns %s: %v", path, err)
134 134
 		}
... ...
@@ -306,9 +306,9 @@ func (d *driver) DiscoverNew(dType discoverapi.DiscoveryType, data interface{})
306 306
 		}
307 307
 		keys := make([]*key, 0, len(encrData.Keys))
308 308
 		for i := 0; i < len(encrData.Keys); i++ {
309
-			k, err := parseEncryptionKey(encrData.Keys[i], encrData.Tags[i])
310
-			if err != nil {
311
-				return err
309
+			k := &key{
310
+				value: encrData.Keys[i],
311
+				tag:   uint32(encrData.Tags[i]),
312 312
 			}
313 313
 			keys = append(keys, k)
314 314
 		}
... ...
@@ -319,17 +319,23 @@ func (d *driver) DiscoverNew(dType discoverapi.DiscoveryType, data interface{})
319 319
 		if !ok {
320 320
 			return fmt.Errorf("invalid encryption key notification data")
321 321
 		}
322
-		newKey, err = parseEncryptionKey(encrData.Key, encrData.Tag)
323
-		if err != nil {
324
-			return err
322
+		if encrData.Key != nil {
323
+			newKey = &key{
324
+				value: encrData.Key,
325
+				tag:   uint32(encrData.Tag),
326
+			}
325 327
 		}
326
-		priKey, err = parseEncryptionKey(encrData.Primary, encrData.PrimaryTag)
327
-		if err != nil {
328
-			return err
328
+		if encrData.Primary != nil {
329
+			priKey = &key{
330
+				value: encrData.Primary,
331
+				tag:   uint32(encrData.PrimaryTag),
332
+			}
329 333
 		}
330
-		delKey, err = parseEncryptionKey(encrData.Prune, encrData.PruneTag)
331
-		if err != nil {
332
-			return err
334
+		if encrData.Prune != nil {
335
+			delKey = &key{
336
+				value: encrData.Prune,
337
+				tag:   uint32(encrData.PruneTag),
338
+			}
333 339
 		}
334 340
 		d.updateKeys(newKey, priKey, delKey)
335 341
 	default:
... ...
@@ -3,6 +3,7 @@ package networkdb
3 3
 import (
4 4
 	"bytes"
5 5
 	"crypto/rand"
6
+	"encoding/hex"
6 7
 	"fmt"
7 8
 	"math/big"
8 9
 	rnd "math/rand"
... ...
@@ -36,6 +37,7 @@ func (l *logWriter) Write(p []byte) (int, error) {
36 36
 
37 37
 // SetKey adds a new key to the key ring
38 38
 func (nDB *NetworkDB) SetKey(key []byte) {
39
+	logrus.Debugf("Adding key %s", hex.EncodeToString(key)[0:5])
39 40
 	for _, dbKey := range nDB.config.Keys {
40 41
 		if bytes.Equal(key, dbKey) {
41 42
 			return
... ...
@@ -50,6 +52,7 @@ func (nDB *NetworkDB) SetKey(key []byte) {
50 50
 // SetPrimaryKey sets the given key as the primary key. This should have
51 51
 // been added apriori through SetKey
52 52
 func (nDB *NetworkDB) SetPrimaryKey(key []byte) {
53
+	logrus.Debugf("Primary Key %s", hex.EncodeToString(key)[0:5])
53 54
 	for _, dbKey := range nDB.config.Keys {
54 55
 		if bytes.Equal(key, dbKey) {
55 56
 			if nDB.keyring != nil {
... ...
@@ -63,6 +66,7 @@ func (nDB *NetworkDB) SetPrimaryKey(key []byte) {
63 63
 // RemoveKey removes a key from the key ring. The key being removed
64 64
 // can't be the primary key
65 65
 func (nDB *NetworkDB) RemoveKey(key []byte) {
66
+	logrus.Debugf("Remove Key %s", hex.EncodeToString(key)[0:5])
66 67
 	for i, dbKey := range nDB.config.Keys {
67 68
 		if bytes.Equal(key, dbKey) {
68 69
 			nDB.config.Keys = append(nDB.config.Keys[:i], nDB.config.Keys[i+1:]...)
... ...
@@ -90,6 +94,9 @@ func (nDB *NetworkDB) clusterInit() error {
90 90
 
91 91
 	var err error
92 92
 	if len(nDB.config.Keys) > 0 {
93
+		for i, key := range nDB.config.Keys {
94
+			logrus.Debugf("Encryption key %d: %s", i+1, hex.EncodeToString(key)[0:5])
95
+		}
93 96
 		nDB.keyring, err = memberlist.NewKeyring(nDB.config.Keys, nDB.config.Keys[0])
94 97
 		if err != nil {
95 98
 			return err
... ...
@@ -3,6 +3,8 @@ package ns
3 3
 import (
4 4
 	"fmt"
5 5
 	"os"
6
+	"os/exec"
7
+	"strings"
6 8
 	"sync"
7 9
 	"syscall"
8 10
 
... ...
@@ -24,7 +26,7 @@ func Init() {
24 24
 	if err != nil {
25 25
 		log.Errorf("could not get initial namespace: %v", err)
26 26
 	}
27
-	initNl, err = netlink.NewHandle()
27
+	initNl, err = netlink.NewHandle(getSupportedNlFamilies()...)
28 28
 	if err != nil {
29 29
 		log.Errorf("could not create netlink handle on initial namespace: %v", err)
30 30
 	}
... ...
@@ -32,6 +34,7 @@ func Init() {
32 32
 
33 33
 // SetNamespace sets the initial namespace handler
34 34
 func SetNamespace() error {
35
+	initOnce.Do(Init)
35 36
 	if err := netns.Set(initNs); err != nil {
36 37
 		linkInfo, linkErr := getLink()
37 38
 		if linkErr != nil {
... ...
@@ -62,3 +65,22 @@ func NlHandle() *netlink.Handle {
62 62
 	initOnce.Do(Init)
63 63
 	return initNl
64 64
 }
65
+
66
+func getSupportedNlFamilies() []int {
67
+	fams := []int{syscall.NETLINK_ROUTE}
68
+	if err := loadXfrmModules(); err != nil {
69
+		log.Warnf("Could not load necessary modules for IPSEC rules: %v", err)
70
+		return fams
71
+	}
72
+	return append(fams, syscall.NETLINK_XFRM)
73
+}
74
+
75
+func loadXfrmModules() error {
76
+	if out, err := exec.Command("modprobe", "-va", "xfrm_user").CombinedOutput(); err != nil {
77
+		return fmt.Errorf("Running modprobe xfrm_user failed with message: `%s`, error: %v", strings.TrimSpace(string(out)), err)
78
+	}
79
+	if out, err := exec.Command("modprobe", "-va", "xfrm_algo").CombinedOutput(); err != nil {
80
+		return fmt.Errorf("Running modprobe xfrm_algo failed with message: `%s`, error: %v", strings.TrimSpace(string(out)), err)
81
+	}
82
+	return nil
83
+}
... ...
@@ -6,6 +6,7 @@ import (
6 6
 	"regexp"
7 7
 	"sync"
8 8
 	"syscall"
9
+	"time"
9 10
 
10 11
 	log "github.com/Sirupsen/logrus"
11 12
 	"github.com/docker/libnetwork/ns"
... ...
@@ -290,7 +291,12 @@ func (n *networkNamespace) AddInterface(srcName, dstPrefix string, options ...If
290 290
 	}
291 291
 
292 292
 	// Up the interface.
293
-	if err := nlh.LinkSetUp(iface); err != nil {
293
+	cnt := 0
294
+	for err = nlh.LinkSetUp(iface); err != nil && cnt < 3; cnt++ {
295
+		log.Debugf("retrying link setup because of: %v", err)
296
+		time.Sleep(10 * time.Millisecond)
297
+	}
298
+	if err != nil {
294 299
 		return fmt.Errorf("failed to set link up: %v", err)
295 300
 	}
296 301
 
... ...
@@ -30,7 +30,6 @@ var (
30 30
 	gpmWg            sync.WaitGroup
31 31
 	gpmCleanupPeriod = 60 * time.Second
32 32
 	gpmChan          = make(chan chan struct{})
33
-	nsOnce           sync.Once
34 33
 )
35 34
 
36 35
 // The networkNamespace type is the linux implementation of the Sandbox
... ...
@@ -196,7 +195,7 @@ func NewSandbox(key string, osCreate, isRestore bool) (Sandbox, error) {
196 196
 	}
197 197
 	defer sboxNs.Close()
198 198
 
199
-	n.nlHandle, err = netlink.NewHandleAt(sboxNs)
199
+	n.nlHandle, err = netlink.NewHandleAt(sboxNs, syscall.NETLINK_ROUTE)
200 200
 	if err != nil {
201 201
 		return nil, fmt.Errorf("failed to create a netlink handle: %v", err)
202 202
 	}
... ...
@@ -238,7 +237,7 @@ func GetSandboxForExternalKey(basePath string, key string) (Sandbox, error) {
238 238
 	}
239 239
 	defer sboxNs.Close()
240 240
 
241
-	n.nlHandle, err = netlink.NewHandleAt(sboxNs)
241
+	n.nlHandle, err = netlink.NewHandleAt(sboxNs, syscall.NETLINK_ROUTE)
242 242
 	if err != nil {
243 243
 		return nil, fmt.Errorf("failed to create a netlink handle: %v", err)
244 244
 	}
... ...
@@ -326,7 +325,6 @@ func (n *networkNamespace) InvokeFunc(f func()) error {
326 326
 
327 327
 // InitOSContext initializes OS context while configuring network resources
328 328
 func InitOSContext() func() {
329
-	nsOnce.Do(ns.Init)
330 329
 	runtime.LockOSThread()
331 330
 	if err := ns.SetNamespace(); err != nil {
332 331
 		log.Error(err)
... ...
@@ -62,6 +62,21 @@ type extDNSEntry struct {
62 62
 	extOnce sync.Once
63 63
 }
64 64
 
65
+type sboxQuery struct {
66
+	sboxID string
67
+	dnsID  uint16
68
+}
69
+
70
+type clientConnGC struct {
71
+	toDelete bool
72
+	client   clientConn
73
+}
74
+
75
+var (
76
+	queryGCMutex sync.Mutex
77
+	queryGC      map[sboxQuery]*clientConnGC
78
+)
79
+
65 80
 // resolver implements the Resolver interface
66 81
 type resolver struct {
67 82
 	sb         *sandbox
... ...
@@ -79,6 +94,21 @@ type resolver struct {
79 79
 
80 80
 func init() {
81 81
 	rand.Seed(time.Now().Unix())
82
+	queryGC = make(map[sboxQuery]*clientConnGC)
83
+	go func() {
84
+		ticker := time.NewTicker(1 * time.Minute)
85
+		for range ticker.C {
86
+			queryGCMutex.Lock()
87
+			for query, conn := range queryGC {
88
+				if !conn.toDelete {
89
+					conn.toDelete = true
90
+					continue
91
+				}
92
+				delete(queryGC, query)
93
+			}
94
+			queryGCMutex.Unlock()
95
+		}
96
+	}()
82 97
 }
83 98
 
84 99
 // NewResolver creates a new instance of the Resolver
... ...
@@ -370,6 +400,7 @@ func (r *resolver) ServeDNS(w dns.ResponseWriter, query *dns.Msg) {
370 370
 		writer = w
371 371
 	} else {
372 372
 		queryID := query.Id
373
+	extQueryLoop:
373 374
 		for i := 0; i < maxExtDNS; i++ {
374 375
 			extDNS := &r.extDNSList[i]
375 376
 			if extDNS.ipStr == "" {
... ...
@@ -435,14 +466,26 @@ func (r *resolver) ServeDNS(w dns.ResponseWriter, query *dns.Msg) {
435 435
 				log.Debugf("Send to DNS server failed, %s", err)
436 436
 				continue
437 437
 			}
438
+			for {
439
+				// If a reply comes after a read timeout it will remain in the socket buffer
440
+				// and will be read after sending next query. To ignore such stale replies
441
+				// save the query context in a GC queue when read timesout. On the next reply
442
+				// if the context is present in the GC queue its a old reply. Ignore it and
443
+				// read again
444
+				resp, err = co.ReadMsg()
445
+				if err != nil {
446
+					if nerr, ok := err.(net.Error); ok && nerr.Timeout() {
447
+						r.addQueryToGC(w, query)
448
+					}
449
+					r.forwardQueryEnd(w, query)
450
+					log.Debugf("Read from DNS server failed, %s", err)
451
+					continue extQueryLoop
452
+				}
438 453
 
439
-			resp, err = co.ReadMsg()
440
-			if err != nil {
441
-				r.forwardQueryEnd(w, query)
442
-				log.Debugf("Read from DNS server failed, %s", err)
443
-				continue
454
+				if !r.checkRespInGC(w, resp) {
455
+					break
456
+				}
444 457
 			}
445
-
446 458
 			// Retrieves the context for the forwarded query and returns the client connection
447 459
 			// to send the reply to
448 460
 			writer = r.forwardQueryEnd(w, resp)
... ...
@@ -501,6 +544,49 @@ func (r *resolver) forwardQueryStart(w dns.ResponseWriter, msg *dns.Msg, queryID
501 501
 	return true
502 502
 }
503 503
 
504
+func (r *resolver) addQueryToGC(w dns.ResponseWriter, msg *dns.Msg) {
505
+	if w.LocalAddr().Network() != "udp" {
506
+		return
507
+	}
508
+
509
+	r.queryLock.Lock()
510
+	cc, ok := r.client[msg.Id]
511
+	r.queryLock.Unlock()
512
+	if !ok {
513
+		return
514
+	}
515
+
516
+	query := sboxQuery{
517
+		sboxID: r.sb.ID(),
518
+		dnsID:  msg.Id,
519
+	}
520
+	clientGC := &clientConnGC{
521
+		client: cc,
522
+	}
523
+	queryGCMutex.Lock()
524
+	queryGC[query] = clientGC
525
+	queryGCMutex.Unlock()
526
+}
527
+
528
+func (r *resolver) checkRespInGC(w dns.ResponseWriter, msg *dns.Msg) bool {
529
+	if w.LocalAddr().Network() != "udp" {
530
+		return false
531
+	}
532
+
533
+	query := sboxQuery{
534
+		sboxID: r.sb.ID(),
535
+		dnsID:  msg.Id,
536
+	}
537
+
538
+	queryGCMutex.Lock()
539
+	defer queryGCMutex.Unlock()
540
+	if _, ok := queryGC[query]; ok {
541
+		delete(queryGC, query)
542
+		return true
543
+	}
544
+	return false
545
+}
546
+
504 547
 func (r *resolver) forwardQueryEnd(w dns.ResponseWriter, msg *dns.Msg) dns.ResponseWriter {
505 548
 	var (
506 549
 		cc clientConn