Signed-off-by: Aaron Lehmann <aaron.lehmann@docker.com>
| ... | ... |
@@ -65,7 +65,7 @@ clone git github.com/RackSec/srslog 259aed10dfa74ea2961eddd1d9847619f6e98837 |
| 65 | 65 |
clone git github.com/imdario/mergo 0.2.1 |
| 66 | 66 |
|
| 67 | 67 |
#get libnetwork packages |
| 68 |
-clone git github.com/docker/libnetwork 905d374c096ca1f3a9b75529e52518b7540179f3 |
|
| 68 |
+clone git github.com/docker/libnetwork 83ab4deaa2da3deb32cb5e64ceec43801dc17370 |
|
| 69 | 69 |
clone git github.com/docker/go-events afb2b9f2c23f33ada1a22b03651775fdc65a5089 |
| 70 | 70 |
clone git github.com/armon/go-radix e39d623f12e8e41c7b5529e9a9dd67a1e2261f80 |
| 71 | 71 |
clone git github.com/armon/go-metrics eb0af217e5e9747e41dd5303755356b62d28e3ec |
| ... | ... |
@@ -75,7 +75,7 @@ clone git github.com/hashicorp/go-multierror fcdddc395df1ddf4247c69bd436e84cfa07 |
| 75 | 75 |
clone git github.com/hashicorp/serf 598c54895cc5a7b1a24a398d635e8c0ea0959870 |
| 76 | 76 |
clone git github.com/docker/libkv v0.2.1 |
| 77 | 77 |
clone git github.com/vishvananda/netns 604eaf189ee867d8c147fafc28def2394e878d25 |
| 78 |
-clone git github.com/vishvananda/netlink 734d02c3e202f682c74b71314b2c61eec0170fd4 |
|
| 78 |
+clone git github.com/vishvananda/netlink e73bad418fd727ed3a02830b1af1ad0283a1de6c |
|
| 79 | 79 |
clone git github.com/BurntSushi/toml f706d00e3de6abe700c994cdd545a1a4915af060 |
| 80 | 80 |
clone git github.com/samuel/go-zookeeper d0e0d8e11f318e000a8cc434616d69e329edc374 |
| 81 | 81 |
clone git github.com/deckarep/golang-set ef32fa3046d9f249d399f98ebaf9be944430fd1d |
| ... | ... |
@@ -35,6 +35,7 @@ func (b ByTime) Less(i, j int) bool { return b[i].LamportTime < b[j].LamportTime
|
| 35 | 35 |
type agent struct {
|
| 36 | 36 |
networkDB *networkdb.NetworkDB |
| 37 | 37 |
bindAddr string |
| 38 |
+ advertiseAddr string |
|
| 38 | 39 |
epTblCancel func() |
| 39 | 40 |
driverCancelFuncs map[string][]func() |
| 40 | 41 |
} |
| ... | ... |
@@ -236,25 +237,14 @@ func (c *controller) handleKeyChangeV1(keys []*types.EncryptionKey) error {
|
| 236 | 236 |
func (c *controller) agentSetup() error {
|
| 237 | 237 |
clusterProvider := c.cfg.Daemon.ClusterProvider |
| 238 | 238 |
|
| 239 |
- bindAddr, _, _ := net.SplitHostPort(clusterProvider.GetListenAddress()) |
|
| 239 |
+ bindAddr := clusterProvider.GetLocalAddress() |
|
| 240 |
+ advAddr := clusterProvider.GetAdvertiseAddress() |
|
| 240 | 241 |
remote := clusterProvider.GetRemoteAddress() |
| 241 | 242 |
remoteAddr, _, _ := net.SplitHostPort(remote) |
| 242 | 243 |
|
| 243 |
- // Determine the BindAddress from RemoteAddress or through best-effort routing |
|
| 244 |
- if !isValidClusteringIP(bindAddr) {
|
|
| 245 |
- if !isValidClusteringIP(remoteAddr) {
|
|
| 246 |
- remote = "8.8.8.8:53" |
|
| 247 |
- } |
|
| 248 |
- conn, err := net.Dial("udp", remote)
|
|
| 249 |
- if err == nil {
|
|
| 250 |
- bindHostPort := conn.LocalAddr().String() |
|
| 251 |
- bindAddr, _, _ = net.SplitHostPort(bindHostPort) |
|
| 252 |
- conn.Close() |
|
| 253 |
- } |
|
| 254 |
- } |
|
| 255 |
- |
|
| 256 |
- if bindAddr != "" && c.agent == nil {
|
|
| 257 |
- if err := c.agentInit(bindAddr); err != nil {
|
|
| 244 |
+ logrus.Infof("Initializing Libnetwork Agent Local-addr=%s Adv-addr=%s Remote-addr =%s", bindAddr, advAddr, remoteAddr)
|
|
| 245 |
+ if advAddr != "" && c.agent == nil {
|
|
| 246 |
+ if err := c.agentInit(bindAddr, advAddr); err != nil {
|
|
| 258 | 247 |
logrus.Errorf("Error in agentInit : %v", err)
|
| 259 | 248 |
} else {
|
| 260 | 249 |
c.drvRegistry.WalkDrivers(func(name string, driver driverapi.Driver, capability driverapi.Capability) bool {
|
| ... | ... |
@@ -312,7 +302,7 @@ func (c *controller) getPrimaryKeyTag(subsys string) ([]byte, uint64) {
|
| 312 | 312 |
return keys[1].Key, keys[1].LamportTime |
| 313 | 313 |
} |
| 314 | 314 |
|
| 315 |
-func (c *controller) agentInit(bindAddrOrInterface string) error {
|
|
| 315 |
+func (c *controller) agentInit(bindAddrOrInterface, advertiseAddr string) error {
|
|
| 316 | 316 |
if !c.isAgent() {
|
| 317 | 317 |
return nil |
| 318 | 318 |
} |
| ... | ... |
@@ -325,9 +315,9 @@ func (c *controller) agentInit(bindAddrOrInterface string) error {
|
| 325 | 325 |
keys, tags := c.getKeys(subsysGossip) |
| 326 | 326 |
hostname, _ := os.Hostname() |
| 327 | 327 |
nDB, err := networkdb.New(&networkdb.Config{
|
| 328 |
- BindAddr: bindAddr, |
|
| 329 |
- NodeName: hostname, |
|
| 330 |
- Keys: keys, |
|
| 328 |
+ AdvertiseAddr: advertiseAddr, |
|
| 329 |
+ NodeName: hostname, |
|
| 330 |
+ Keys: keys, |
|
| 331 | 331 |
}) |
| 332 | 332 |
|
| 333 | 333 |
if err != nil {
|
| ... | ... |
@@ -339,6 +329,7 @@ func (c *controller) agentInit(bindAddrOrInterface string) error {
|
| 339 | 339 |
c.agent = &agent{
|
| 340 | 340 |
networkDB: nDB, |
| 341 | 341 |
bindAddr: bindAddr, |
| 342 |
+ advertiseAddr: advertiseAddr, |
|
| 342 | 343 |
epTblCancel: cancel, |
| 343 | 344 |
driverCancelFuncs: make(map[string][]func()), |
| 344 | 345 |
} |
| ... | ... |
@@ -377,8 +368,9 @@ func (c *controller) agentDriverNotify(d driverapi.Driver) {
|
| 377 | 377 |
} |
| 378 | 378 |
|
| 379 | 379 |
d.DiscoverNew(discoverapi.NodeDiscovery, discoverapi.NodeDiscoveryData{
|
| 380 |
- Address: c.agent.bindAddr, |
|
| 381 |
- Self: true, |
|
| 380 |
+ Address: c.agent.advertiseAddr, |
|
| 381 |
+ BindAddress: c.agent.bindAddr, |
|
| 382 |
+ Self: true, |
|
| 382 | 383 |
}) |
| 383 | 384 |
|
| 384 | 385 |
drvEnc := discoverapi.DriverEncryptionConfig{}
|
| ... | ... |
@@ -11,6 +11,7 @@ import ( |
| 11 | 11 |
"github.com/docker/libnetwork/cluster" |
| 12 | 12 |
"github.com/docker/libnetwork/datastore" |
| 13 | 13 |
"github.com/docker/libnetwork/netlabel" |
| 14 |
+ "github.com/docker/libnetwork/osl" |
|
| 14 | 15 |
) |
| 15 | 16 |
|
| 16 | 17 |
// Config encapsulates configurations of various Libnetwork components |
| ... | ... |
@@ -197,6 +198,13 @@ func OptionDataDir(dataDir string) Option {
|
| 197 | 197 |
} |
| 198 | 198 |
} |
| 199 | 199 |
|
| 200 |
+// OptionExecRoot function returns an option setter for exec root folder |
|
| 201 |
+func OptionExecRoot(execRoot string) Option {
|
|
| 202 |
+ return func(c *Config) {
|
|
| 203 |
+ osl.SetBasePath(execRoot) |
|
| 204 |
+ } |
|
| 205 |
+} |
|
| 206 |
+ |
|
| 200 | 207 |
// ProcessOptions processes options and stores it in config |
| 201 | 208 |
func (c *Config) ProcessOptions(options ...Option) {
|
| 202 | 209 |
for _, opt := range options {
|
| ... | ... |
@@ -378,6 +378,10 @@ func (c *controller) ReloadConfiguration(cfgOptions ...config.Option) error {
|
| 378 | 378 |
return nil |
| 379 | 379 |
} |
| 380 | 380 |
|
| 381 |
+ c.Lock() |
|
| 382 |
+ c.cfg = cfg |
|
| 383 |
+ c.Unlock() |
|
| 384 |
+ |
|
| 381 | 385 |
var dsConfig *discoverapi.DatastoreConfigData |
| 382 | 386 |
for scope, sCfg := range cfg.Scopes {
|
| 383 | 387 |
if scope == datastore.LocalScope || !sCfg.IsValid() {
|
| ... | ... |
@@ -26,8 +26,9 @@ const ( |
| 26 | 26 |
|
| 27 | 27 |
// NodeDiscoveryData represents the structure backing the node discovery data json string |
| 28 | 28 |
type NodeDiscoveryData struct {
|
| 29 |
- Address string |
|
| 30 |
- Self bool |
|
| 29 |
+ Address string |
|
| 30 |
+ BindAddress string |
|
| 31 |
+ Self bool |
|
| 31 | 32 |
} |
| 32 | 33 |
|
| 33 | 34 |
// DatastoreConfigData is the data for the datastore update event message |
| ... | ... |
@@ -83,9 +83,9 @@ func (d *driver) populateEndpoints() error {
|
| 83 | 83 |
n, ok := d.networks[ep.nid] |
| 84 | 84 |
if !ok {
|
| 85 | 85 |
logrus.Debugf("Network (%s) not found for restored bridge endpoint (%s)", ep.nid[0:7], ep.id[0:7])
|
| 86 |
- logrus.Debugf("Deleting stale bridge endpoint (%s) from store", ep.nid[0:7])
|
|
| 86 |
+ logrus.Debugf("Deleting stale bridge endpoint (%s) from store", ep.id[0:7])
|
|
| 87 | 87 |
if err := d.storeDelete(ep); err != nil {
|
| 88 |
- logrus.Debugf("Failed to delete stale bridge endpoint (%s) from store", ep.nid[0:7])
|
|
| 88 |
+ logrus.Debugf("Failed to delete stale bridge endpoint (%s) from store", ep.id[0:7])
|
|
| 89 | 89 |
} |
| 90 | 90 |
continue |
| 91 | 91 |
} |
| ... | ... |
@@ -96,9 +96,9 @@ func (d *driver) populateEndpoints() error {
|
| 96 | 96 |
n, ok := d.networks[ep.nid] |
| 97 | 97 |
if !ok {
|
| 98 | 98 |
logrus.Debugf("Network (%s) not found for restored ipvlan endpoint (%s)", ep.nid[0:7], ep.id[0:7])
|
| 99 |
- logrus.Debugf("Deleting stale ipvlan endpoint (%s) from store", ep.nid[0:7])
|
|
| 99 |
+ logrus.Debugf("Deleting stale ipvlan endpoint (%s) from store", ep.id[0:7])
|
|
| 100 | 100 |
if err := d.storeDelete(ep); err != nil {
|
| 101 |
- logrus.Debugf("Failed to delete stale ipvlan endpoint (%s) from store", ep.nid[0:7])
|
|
| 101 |
+ logrus.Debugf("Failed to delete stale ipvlan endpoint (%s) from store", ep.id[0:7])
|
|
| 102 | 102 |
} |
| 103 | 103 |
continue |
| 104 | 104 |
} |
| ... | ... |
@@ -96,9 +96,9 @@ func (d *driver) populateEndpoints() error {
|
| 96 | 96 |
n, ok := d.networks[ep.nid] |
| 97 | 97 |
if !ok {
|
| 98 | 98 |
logrus.Debugf("Network (%s) not found for restored macvlan endpoint (%s)", ep.nid[0:7], ep.id[0:7])
|
| 99 |
- logrus.Debugf("Deleting stale macvlan endpoint (%s) from store", ep.nid[0:7])
|
|
| 99 |
+ logrus.Debugf("Deleting stale macvlan endpoint (%s) from store", ep.id[0:7])
|
|
| 100 | 100 |
if err := d.storeDelete(ep); err != nil {
|
| 101 |
- logrus.Debugf("Failed to delete stale macvlan endpoint (%s) from store", ep.nid[0:7])
|
|
| 101 |
+ logrus.Debugf("Failed to delete stale macvlan endpoint (%s) from store", ep.id[0:7])
|
|
| 102 | 102 |
} |
| 103 | 103 |
continue |
| 104 | 104 |
} |
| ... | ... |
@@ -2,23 +2,27 @@ package overlay |
| 2 | 2 |
|
| 3 | 3 |
import ( |
| 4 | 4 |
"bytes" |
| 5 |
+ "encoding/binary" |
|
| 5 | 6 |
"encoding/hex" |
| 6 | 7 |
"fmt" |
| 8 |
+ "hash/fnv" |
|
| 7 | 9 |
"net" |
| 8 | 10 |
"sync" |
| 9 | 11 |
"syscall" |
| 10 | 12 |
|
| 13 |
+ "strconv" |
|
| 14 |
+ |
|
| 11 | 15 |
log "github.com/Sirupsen/logrus" |
| 12 | 16 |
"github.com/docker/libnetwork/iptables" |
| 13 | 17 |
"github.com/docker/libnetwork/ns" |
| 14 | 18 |
"github.com/docker/libnetwork/types" |
| 15 | 19 |
"github.com/vishvananda/netlink" |
| 16 |
- "strconv" |
|
| 17 | 20 |
) |
| 18 | 21 |
|
| 19 | 22 |
const ( |
| 20 |
- mark = uint32(0xD0C4E3) |
|
| 21 |
- timeout = 30 |
|
| 23 |
+ mark = uint32(0xD0C4E3) |
|
| 24 |
+ timeout = 30 |
|
| 25 |
+ pktExpansion = 26 // SPI(4) + SeqN(4) + IV(8) + PadLength(1) + NextHeader(1) + ICV(8) |
|
| 22 | 26 |
) |
| 23 | 27 |
|
| 24 | 28 |
const ( |
| ... | ... |
@@ -85,6 +89,7 @@ func (d *driver) checkEncryption(nid string, rIP net.IP, vxlanID uint32, isLocal |
| 85 | 85 |
} |
| 86 | 86 |
|
| 87 | 87 |
lIP := types.GetMinimalIP(net.ParseIP(d.bindAddress)) |
| 88 |
+ aIP := types.GetMinimalIP(net.ParseIP(d.advertiseAddress)) |
|
| 88 | 89 |
nodes := map[string]net.IP{}
|
| 89 | 90 |
|
| 90 | 91 |
switch {
|
| ... | ... |
@@ -107,7 +112,7 @@ func (d *driver) checkEncryption(nid string, rIP net.IP, vxlanID uint32, isLocal |
| 107 | 107 |
|
| 108 | 108 |
if add {
|
| 109 | 109 |
for _, rIP := range nodes {
|
| 110 |
- if err := setupEncryption(lIP, rIP, vxlanID, d.secMap, d.keys); err != nil {
|
|
| 110 |
+ if err := setupEncryption(lIP, aIP, rIP, vxlanID, d.secMap, d.keys); err != nil {
|
|
| 111 | 111 |
log.Warnf("Failed to program network encryption between %s and %s: %v", lIP, rIP, err)
|
| 112 | 112 |
} |
| 113 | 113 |
} |
| ... | ... |
@@ -122,7 +127,7 @@ func (d *driver) checkEncryption(nid string, rIP net.IP, vxlanID uint32, isLocal |
| 122 | 122 |
return nil |
| 123 | 123 |
} |
| 124 | 124 |
|
| 125 |
-func setupEncryption(localIP, remoteIP net.IP, vni uint32, em *encrMap, keys []*key) error {
|
|
| 125 |
+func setupEncryption(localIP, advIP, remoteIP net.IP, vni uint32, em *encrMap, keys []*key) error {
|
|
| 126 | 126 |
log.Debugf("Programming encryption for vxlan %d between %s and %s", vni, localIP, remoteIP)
|
| 127 | 127 |
rIPs := remoteIP.String() |
| 128 | 128 |
|
| ... | ... |
@@ -134,7 +139,7 @@ func setupEncryption(localIP, remoteIP net.IP, vni uint32, em *encrMap, keys []* |
| 134 | 134 |
} |
| 135 | 135 |
|
| 136 | 136 |
for i, k := range keys {
|
| 137 |
- spis := &spi{buildSPI(localIP, remoteIP, k.tag), buildSPI(remoteIP, localIP, k.tag)}
|
|
| 137 |
+ spis := &spi{buildSPI(advIP, remoteIP, k.tag), buildSPI(remoteIP, advIP, k.tag)}
|
|
| 138 | 138 |
dir := reverse |
| 139 | 139 |
if i == 0 {
|
| 140 | 140 |
dir = bidir |
| ... | ... |
@@ -216,7 +221,6 @@ func programMangle(vni uint32, add bool) (err error) {
|
| 216 | 216 |
|
| 217 | 217 |
func programSA(localIP, remoteIP net.IP, spi *spi, k *key, dir int, add bool) (fSA *netlink.XfrmState, rSA *netlink.XfrmState, err error) {
|
| 218 | 218 |
var ( |
| 219 |
- crypt *netlink.XfrmStateAlgo |
|
| 220 | 219 |
action = "Removing" |
| 221 | 220 |
xfrmProgram = ns.NlHandle().XfrmStateDel |
| 222 | 221 |
) |
| ... | ... |
@@ -224,7 +228,6 @@ func programSA(localIP, remoteIP net.IP, spi *spi, k *key, dir int, add bool) (f |
| 224 | 224 |
if add {
|
| 225 | 225 |
action = "Adding" |
| 226 | 226 |
xfrmProgram = ns.NlHandle().XfrmStateAdd |
| 227 |
- crypt = &netlink.XfrmStateAlgo{Name: "cbc(aes)", Key: k.value}
|
|
| 228 | 227 |
} |
| 229 | 228 |
|
| 230 | 229 |
if dir&reverse > 0 {
|
| ... | ... |
@@ -236,7 +239,7 @@ func programSA(localIP, remoteIP net.IP, spi *spi, k *key, dir int, add bool) (f |
| 236 | 236 |
Mode: netlink.XFRM_MODE_TRANSPORT, |
| 237 | 237 |
} |
| 238 | 238 |
if add {
|
| 239 |
- rSA.Crypt = crypt |
|
| 239 |
+ rSA.Aead = buildAeadAlgo(k, spi.reverse) |
|
| 240 | 240 |
} |
| 241 | 241 |
|
| 242 | 242 |
exists, err := saExists(rSA) |
| ... | ... |
@@ -261,7 +264,7 @@ func programSA(localIP, remoteIP net.IP, spi *spi, k *key, dir int, add bool) (f |
| 261 | 261 |
Mode: netlink.XFRM_MODE_TRANSPORT, |
| 262 | 262 |
} |
| 263 | 263 |
if add {
|
| 264 |
- fSA.Crypt = crypt |
|
| 264 |
+ fSA.Aead = buildAeadAlgo(k, spi.forward) |
|
| 265 | 265 |
} |
| 266 | 266 |
|
| 267 | 267 |
exists, err := saExists(fSA) |
| ... | ... |
@@ -354,13 +357,23 @@ func spExists(sp *netlink.XfrmPolicy) (bool, error) {
|
| 354 | 354 |
} |
| 355 | 355 |
|
| 356 | 356 |
func buildSPI(src, dst net.IP, st uint32) int {
|
| 357 |
- spi := int(st) |
|
| 358 |
- f := src[len(src)-4:] |
|
| 359 |
- t := dst[len(dst)-4:] |
|
| 360 |
- for i := 0; i < 4; i++ {
|
|
| 361 |
- spi = spi ^ (int(f[i])^int(t[3-i]))<<uint32(8*i) |
|
| 357 |
+ b := make([]byte, 4) |
|
| 358 |
+ binary.BigEndian.PutUint32(b, st) |
|
| 359 |
+ h := fnv.New32a() |
|
| 360 |
+ h.Write(src) |
|
| 361 |
+ h.Write(b) |
|
| 362 |
+ h.Write(dst) |
|
| 363 |
+ return int(binary.BigEndian.Uint32(h.Sum(nil))) |
|
| 364 |
+} |
|
| 365 |
+ |
|
| 366 |
+func buildAeadAlgo(k *key, s int) *netlink.XfrmStateAlgo {
|
|
| 367 |
+ salt := make([]byte, 4) |
|
| 368 |
+ binary.BigEndian.PutUint32(salt, uint32(s)) |
|
| 369 |
+ return &netlink.XfrmStateAlgo{
|
|
| 370 |
+ Name: "rfc4106(gcm(aes))", |
|
| 371 |
+ Key: append(k.value, salt...), |
|
| 372 |
+ ICVLen: 64, |
|
| 362 | 373 |
} |
| 363 |
- return spi |
|
| 364 | 374 |
} |
| 365 | 375 |
|
| 366 | 376 |
func (d *driver) secMapWalk(f func(string, []*spi) ([]*spi, bool)) error {
|
| ... | ... |
@@ -560,3 +573,14 @@ func updateNodeKey(lIP, rIP net.IP, idxs []*spi, curKeys []*key, newIdx, priIdx, |
| 560 | 560 |
|
| 561 | 561 |
return spis |
| 562 | 562 |
} |
| 563 |
+ |
|
| 564 |
+func (n *network) maxMTU() int {
|
|
| 565 |
+ mtu := vxlanVethMTU |
|
| 566 |
+ if n.secure {
|
|
| 567 |
+ // In case of encryption account for the |
|
| 568 |
+ // esp packet espansion and padding |
|
| 569 |
+ mtu -= pktExpansion |
|
| 570 |
+ mtu -= (mtu % 4) |
|
| 571 |
+ } |
|
| 572 |
+ return mtu |
|
| 573 |
+} |
| ... | ... |
@@ -75,11 +75,13 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo, |
| 75 | 75 |
// Set the container interface and its peer MTU to 1450 to allow |
| 76 | 76 |
// for 50 bytes vxlan encap (inner eth header(14) + outer IP(20) + |
| 77 | 77 |
// outer UDP(8) + vxlan header(8)) |
| 78 |
+ mtu := n.maxMTU() |
|
| 79 |
+ |
|
| 78 | 80 |
veth, err := nlh.LinkByName(overlayIfName) |
| 79 | 81 |
if err != nil {
|
| 80 | 82 |
return fmt.Errorf("cound not find link by name %s: %v", overlayIfName, err)
|
| 81 | 83 |
} |
| 82 |
- err = nlh.LinkSetMTU(veth, vxlanVethMTU) |
|
| 84 |
+ err = nlh.LinkSetMTU(veth, mtu) |
|
| 83 | 85 |
if err != nil {
|
| 84 | 86 |
return err |
| 85 | 87 |
} |
| ... | ... |
@@ -93,7 +95,7 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo, |
| 93 | 93 |
if err != nil {
|
| 94 | 94 |
return fmt.Errorf("could not find link by name %s: %v", containerIfName, err)
|
| 95 | 95 |
} |
| 96 |
- err = nlh.LinkSetMTU(veth, vxlanVethMTU) |
|
| 96 |
+ err = nlh.LinkSetMTU(veth, mtu) |
|
| 97 | 97 |
if err != nil {
|
| 98 | 98 |
return err |
| 99 | 99 |
} |
| ... | ... |
@@ -119,7 +121,7 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo, |
| 119 | 119 |
} |
| 120 | 120 |
|
| 121 | 121 |
d.peerDbAdd(nid, eid, ep.addr.IP, ep.addr.Mask, ep.mac, |
| 122 |
- net.ParseIP(d.bindAddress), true) |
|
| 122 |
+ net.ParseIP(d.advertiseAddress), true) |
|
| 123 | 123 |
|
| 124 | 124 |
if err := d.checkEncryption(nid, nil, n.vxlanID(s), true, true); err != nil {
|
| 125 | 125 |
log.Warn(err) |
| ... | ... |
@@ -128,7 +130,7 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo, |
| 128 | 128 |
buf, err := proto.Marshal(&PeerRecord{
|
| 129 | 129 |
EndpointIP: ep.addr.String(), |
| 130 | 130 |
EndpointMAC: ep.mac.String(), |
| 131 |
- TunnelEndpointIP: d.bindAddress, |
|
| 131 |
+ TunnelEndpointIP: d.advertiseAddress, |
|
| 132 | 132 |
}) |
| 133 | 133 |
if err != nil {
|
| 134 | 134 |
return err |
| ... | ... |
@@ -159,7 +161,7 @@ func (d *driver) EventNotify(etype driverapi.EventType, nid, tableName, key stri |
| 159 | 159 |
|
| 160 | 160 |
// Ignore local peers. We already know about them and they |
| 161 | 161 |
// should not be added to vxlan fdb. |
| 162 |
- if peer.TunnelEndpointIP == d.bindAddress {
|
|
| 162 |
+ if peer.TunnelEndpointIP == d.advertiseAddress {
|
|
| 163 | 163 |
return |
| 164 | 164 |
} |
| 165 | 165 |
|
| ... | ... |
@@ -40,7 +40,7 @@ func (d *driver) serfInit() error {
|
| 40 | 40 |
|
| 41 | 41 |
config := serf.DefaultConfig() |
| 42 | 42 |
config.Init() |
| 43 |
- config.MemberlistConfig.BindAddr = d.bindAddress |
|
| 43 |
+ config.MemberlistConfig.BindAddr = d.advertiseAddress |
|
| 44 | 44 |
|
| 45 | 45 |
d.eventCh = make(chan serf.Event, 4) |
| 46 | 46 |
config.EventCh = d.eventCh |
| ... | ... |
@@ -31,22 +31,23 @@ const ( |
| 31 | 31 |
var initVxlanIdm = make(chan (bool), 1) |
| 32 | 32 |
|
| 33 | 33 |
type driver struct {
|
| 34 |
- eventCh chan serf.Event |
|
| 35 |
- notifyCh chan ovNotify |
|
| 36 |
- exitCh chan chan struct{}
|
|
| 37 |
- bindAddress string |
|
| 38 |
- neighIP string |
|
| 39 |
- config map[string]interface{}
|
|
| 40 |
- peerDb peerNetworkMap |
|
| 41 |
- secMap *encrMap |
|
| 42 |
- serfInstance *serf.Serf |
|
| 43 |
- networks networkTable |
|
| 44 |
- store datastore.DataStore |
|
| 45 |
- localStore datastore.DataStore |
|
| 46 |
- vxlanIdm *idm.Idm |
|
| 47 |
- once sync.Once |
|
| 48 |
- joinOnce sync.Once |
|
| 49 |
- keys []*key |
|
| 34 |
+ eventCh chan serf.Event |
|
| 35 |
+ notifyCh chan ovNotify |
|
| 36 |
+ exitCh chan chan struct{}
|
|
| 37 |
+ bindAddress string |
|
| 38 |
+ advertiseAddress string |
|
| 39 |
+ neighIP string |
|
| 40 |
+ config map[string]interface{}
|
|
| 41 |
+ peerDb peerNetworkMap |
|
| 42 |
+ secMap *encrMap |
|
| 43 |
+ serfInstance *serf.Serf |
|
| 44 |
+ networks networkTable |
|
| 45 |
+ store datastore.DataStore |
|
| 46 |
+ localStore datastore.DataStore |
|
| 47 |
+ vxlanIdm *idm.Idm |
|
| 48 |
+ once sync.Once |
|
| 49 |
+ joinOnce sync.Once |
|
| 50 |
+ keys []*key |
|
| 50 | 51 |
sync.Mutex |
| 51 | 52 |
} |
| 52 | 53 |
|
| ... | ... |
@@ -111,7 +112,11 @@ func (d *driver) restoreEndpoints() error {
|
| 111 | 111 |
ep := kvo.(*endpoint) |
| 112 | 112 |
n := d.network(ep.nid) |
| 113 | 113 |
if n == nil {
|
| 114 |
- logrus.Debugf("Network (%s) not found for restored endpoint (%s)", ep.nid, ep.id)
|
|
| 114 |
+ logrus.Debugf("Network (%s) not found for restored endpoint (%s)", ep.nid[0:7], ep.id[0:7])
|
|
| 115 |
+ logrus.Debugf("Deleting stale overlay endpoint (%s) from store", ep.id[0:7])
|
|
| 116 |
+ if err := d.deleteEndpointFromStore(ep); err != nil {
|
|
| 117 |
+ logrus.Debugf("Failed to delete stale overlay endpoint (%s) from store", ep.id[0:7])
|
|
| 118 |
+ } |
|
| 115 | 119 |
continue |
| 116 | 120 |
} |
| 117 | 121 |
n.addEndpoint(ep) |
| ... | ... |
@@ -140,7 +145,7 @@ func (d *driver) restoreEndpoints() error {
|
| 140 | 140 |
} |
| 141 | 141 |
|
| 142 | 142 |
n.incEndpointCount() |
| 143 |
- d.peerDbAdd(ep.nid, ep.id, ep.addr.IP, ep.addr.Mask, ep.mac, net.ParseIP(d.bindAddress), true) |
|
| 143 |
+ d.peerDbAdd(ep.nid, ep.id, ep.addr.IP, ep.addr.Mask, ep.mac, net.ParseIP(d.advertiseAddress), true) |
|
| 144 | 144 |
} |
| 145 | 145 |
return nil |
| 146 | 146 |
} |
| ... | ... |
@@ -211,20 +216,25 @@ func validateSelf(node string) error {
|
| 211 | 211 |
return fmt.Errorf("Multi-Host overlay networking requires cluster-advertise(%s) to be configured with a local ip-address that is reachable within the cluster", advIP.String())
|
| 212 | 212 |
} |
| 213 | 213 |
|
| 214 |
-func (d *driver) nodeJoin(node string, self bool) {
|
|
| 214 |
+func (d *driver) nodeJoin(advertiseAddress, bindAddress string, self bool) {
|
|
| 215 | 215 |
if self && !d.isSerfAlive() {
|
| 216 |
- if err := validateSelf(node); err != nil {
|
|
| 217 |
- logrus.Errorf("%s", err.Error())
|
|
| 218 |
- } |
|
| 219 | 216 |
d.Lock() |
| 220 |
- d.bindAddress = node |
|
| 217 |
+ d.advertiseAddress = advertiseAddress |
|
| 218 |
+ d.bindAddress = bindAddress |
|
| 221 | 219 |
d.Unlock() |
| 222 | 220 |
|
| 223 | 221 |
// If there is no cluster store there is no need to start serf. |
| 224 | 222 |
if d.store != nil {
|
| 223 |
+ if err := validateSelf(advertiseAddress); err != nil {
|
|
| 224 |
+ logrus.Warnf("%s", err.Error())
|
|
| 225 |
+ } |
|
| 225 | 226 |
err := d.serfInit() |
| 226 | 227 |
if err != nil {
|
| 227 | 228 |
logrus.Errorf("initializing serf instance failed: %v", err)
|
| 229 |
+ d.Lock() |
|
| 230 |
+ d.advertiseAddress = "" |
|
| 231 |
+ d.bindAddress = "" |
|
| 232 |
+ d.Unlock() |
|
| 228 | 233 |
return |
| 229 | 234 |
} |
| 230 | 235 |
} |
| ... | ... |
@@ -232,7 +242,7 @@ func (d *driver) nodeJoin(node string, self bool) {
|
| 232 | 232 |
|
| 233 | 233 |
d.Lock() |
| 234 | 234 |
if !self {
|
| 235 |
- d.neighIP = node |
|
| 235 |
+ d.neighIP = advertiseAddress |
|
| 236 | 236 |
} |
| 237 | 237 |
neighIP := d.neighIP |
| 238 | 238 |
d.Unlock() |
| ... | ... |
@@ -246,7 +256,7 @@ func (d *driver) nodeJoin(node string, self bool) {
|
| 246 | 246 |
} |
| 247 | 247 |
}) |
| 248 | 248 |
if err != nil {
|
| 249 |
- logrus.Errorf("joining serf neighbor %s failed: %v", node, err)
|
|
| 249 |
+ logrus.Errorf("joining serf neighbor %s failed: %v", advertiseAddress, err)
|
|
| 250 | 250 |
d.Lock() |
| 251 | 251 |
d.joinOnce = sync.Once{}
|
| 252 | 252 |
d.Unlock() |
| ... | ... |
@@ -286,7 +296,7 @@ func (d *driver) DiscoverNew(dType discoverapi.DiscoveryType, data interface{})
|
| 286 | 286 |
if !ok || nodeData.Address == "" {
|
| 287 | 287 |
return fmt.Errorf("invalid discovery data")
|
| 288 | 288 |
} |
| 289 |
- d.nodeJoin(nodeData.Address, nodeData.Self) |
|
| 289 |
+ d.nodeJoin(nodeData.Address, nodeData.BindAddress, nodeData.Self) |
|
| 290 | 290 |
case discoverapi.DatastoreConfig: |
| 291 | 291 |
if d.store != nil {
|
| 292 | 292 |
return types.ForbiddenErrorf("cannot accept datastore configuration: Overlay driver has a datastore configured already")
|
| ... | ... |
@@ -113,6 +113,9 @@ func (ec *endpointCnt) updateStore() error {
|
| 113 | 113 |
if store == nil {
|
| 114 | 114 |
return fmt.Errorf("store not found for scope %s on endpoint count update", ec.DataScope())
|
| 115 | 115 |
} |
| 116 |
+ // make a copy of count and n to avoid being overwritten by store.GetObject |
|
| 117 |
+ count := ec.EndpointCnt() |
|
| 118 |
+ n := ec.n |
|
| 116 | 119 |
for {
|
| 117 | 120 |
if err := ec.n.getController().updateToStore(ec); err == nil || err != datastore.ErrKeyModified {
|
| 118 | 121 |
return err |
| ... | ... |
@@ -120,6 +123,10 @@ func (ec *endpointCnt) updateStore() error {
|
| 120 | 120 |
if err := store.GetObject(datastore.Key(ec.Key()...), ec); err != nil {
|
| 121 | 121 |
return fmt.Errorf("could not update the kvobject to latest on endpoint count update: %v", err)
|
| 122 | 122 |
} |
| 123 |
+ ec.Lock() |
|
| 124 |
+ ec.Count = count |
|
| 125 |
+ ec.n = n |
|
| 126 |
+ ec.Unlock() |
|
| 123 | 127 |
} |
| 124 | 128 |
} |
| 125 | 129 |
|
| ... | ... |
@@ -136,7 +143,9 @@ retry: |
| 136 | 136 |
if inc {
|
| 137 | 137 |
ec.Count++ |
| 138 | 138 |
} else {
|
| 139 |
- ec.Count-- |
|
| 139 |
+ if ec.Count > 0 {
|
|
| 140 |
+ ec.Count-- |
|
| 141 |
+ } |
|
| 140 | 142 |
} |
| 141 | 143 |
ec.Unlock() |
| 142 | 144 |
|
| ... | ... |
@@ -1105,9 +1105,13 @@ func (n *network) getSvcRecords(ep *endpoint) []etchosts.Record {
|
| 1105 | 1105 |
} |
| 1106 | 1106 |
|
| 1107 | 1107 |
var recs []etchosts.Record |
| 1108 |
- sr, _ := n.ctrlr.svcRecords[n.id] |
|
| 1108 |
+ |
|
| 1109 | 1109 |
epName := ep.Name() |
| 1110 | 1110 |
|
| 1111 |
+ n.ctrlr.Lock() |
|
| 1112 |
+ sr, _ := n.ctrlr.svcRecords[n.id] |
|
| 1113 |
+ n.ctrlr.Unlock() |
|
| 1114 |
+ |
|
| 1111 | 1115 |
for h, ip := range sr.svcMap {
|
| 1112 | 1116 |
if strings.Split(h, ".")[0] == epName {
|
| 1113 | 1117 |
continue |
| ... | ... |
@@ -81,7 +81,7 @@ func (nDB *NetworkDB) RemoveKey(key []byte) {
|
| 81 | 81 |
func (nDB *NetworkDB) clusterInit() error {
|
| 82 | 82 |
config := memberlist.DefaultLANConfig() |
| 83 | 83 |
config.Name = nDB.config.NodeName |
| 84 |
- config.BindAddr = nDB.config.BindAddr |
|
| 84 |
+ config.AdvertiseAddr = nDB.config.AdvertiseAddr |
|
| 85 | 85 |
|
| 86 | 86 |
if nDB.config.BindPort != 0 {
|
| 87 | 87 |
config.BindPort = nDB.config.BindPort |
| ... | ... |
@@ -107,9 +107,9 @@ type Config struct {
|
| 107 | 107 |
// NodeName is the cluster wide unique name for this node. |
| 108 | 108 |
NodeName string |
| 109 | 109 |
|
| 110 |
- // BindAddr is the local node's IP address that we bind to for |
|
| 110 |
+ // AdvertiseAddr is the node's IP address that we advertise for |
|
| 111 | 111 |
// cluster communication. |
| 112 |
- BindAddr string |
|
| 112 |
+ AdvertiseAddr string |
|
| 113 | 113 |
|
| 114 | 114 |
// BindPort is the local node's port to which we bind to for |
| 115 | 115 |
// cluster communication. |
| ... | ... |
@@ -303,6 +303,7 @@ func (n *networkNamespace) AddInterface(srcName, dstPrefix string, options ...If |
| 303 | 303 |
for err = nlh.LinkSetUp(iface); err != nil && cnt < 3; cnt++ {
|
| 304 | 304 |
log.Debugf("retrying link setup because of: %v", err)
|
| 305 | 305 |
time.Sleep(10 * time.Millisecond) |
| 306 |
+ err = nlh.LinkSetUp(iface) |
|
| 306 | 307 |
} |
| 307 | 308 |
if err != nil {
|
| 308 | 309 |
return fmt.Errorf("failed to set link up: %v", err)
|
| ... | ... |
@@ -6,6 +6,7 @@ import ( |
| 6 | 6 |
"net" |
| 7 | 7 |
"os" |
| 8 | 8 |
"os/exec" |
| 9 |
+ "path/filepath" |
|
| 9 | 10 |
"runtime" |
| 10 | 11 |
"strconv" |
| 11 | 12 |
"strings" |
| ... | ... |
@@ -21,7 +22,7 @@ import ( |
| 21 | 21 |
"github.com/vishvananda/netns" |
| 22 | 22 |
) |
| 23 | 23 |
|
| 24 |
-const prefix = "/var/run/docker/netns" |
|
| 24 |
+const defaultPrefix = "/var/run/docker" |
|
| 25 | 25 |
|
| 26 | 26 |
var ( |
| 27 | 27 |
once sync.Once |
| ... | ... |
@@ -30,6 +31,7 @@ var ( |
| 30 | 30 |
gpmWg sync.WaitGroup |
| 31 | 31 |
gpmCleanupPeriod = 60 * time.Second |
| 32 | 32 |
gpmChan = make(chan chan struct{})
|
| 33 |
+ prefix = defaultPrefix |
|
| 33 | 34 |
) |
| 34 | 35 |
|
| 35 | 36 |
// The networkNamespace type is the linux implementation of the Sandbox |
| ... | ... |
@@ -48,12 +50,21 @@ type networkNamespace struct {
|
| 48 | 48 |
sync.Mutex |
| 49 | 49 |
} |
| 50 | 50 |
|
| 51 |
+// SetBasePath sets the base url prefix for the ns path |
|
| 52 |
+func SetBasePath(path string) {
|
|
| 53 |
+ prefix = path |
|
| 54 |
+} |
|
| 55 |
+ |
|
| 51 | 56 |
func init() {
|
| 52 | 57 |
reexec.Register("netns-create", reexecCreateNamespace)
|
| 53 | 58 |
} |
| 54 | 59 |
|
| 60 |
+func basePath() string {
|
|
| 61 |
+ return filepath.Join(prefix, "netns") |
|
| 62 |
+} |
|
| 63 |
+ |
|
| 55 | 64 |
func createBasePath() {
|
| 56 |
- err := os.MkdirAll(prefix, 0755) |
|
| 65 |
+ err := os.MkdirAll(basePath(), 0755) |
|
| 57 | 66 |
if err != nil {
|
| 58 | 67 |
panic("Could not create net namespace path directory")
|
| 59 | 68 |
} |
| ... | ... |
@@ -142,7 +153,7 @@ func GenerateKey(containerID string) string {
|
| 142 | 142 |
indexStr string |
| 143 | 143 |
tmpkey string |
| 144 | 144 |
) |
| 145 |
- dir, err := ioutil.ReadDir(prefix) |
|
| 145 |
+ dir, err := ioutil.ReadDir(basePath()) |
|
| 146 | 146 |
if err != nil {
|
| 147 | 147 |
return "" |
| 148 | 148 |
} |
| ... | ... |
@@ -172,7 +183,7 @@ func GenerateKey(containerID string) string {
|
| 172 | 172 |
maxLen = len(containerID) |
| 173 | 173 |
} |
| 174 | 174 |
|
| 175 |
- return prefix + "/" + containerID[:maxLen] |
|
| 175 |
+ return basePath() + "/" + containerID[:maxLen] |
|
| 176 | 176 |
} |
| 177 | 177 |
|
| 178 | 178 |
// NewSandbox provides a new sandbox instance created in an os specific way |
| ... | ... |
@@ -413,7 +413,12 @@ func (sb *sandbox) ResolveIP(ip string) string {
|
| 413 | 413 |
for _, ep := range sb.getConnectedEndpoints() {
|
| 414 | 414 |
n := ep.getNetwork() |
| 415 | 415 |
|
| 416 |
- sr, ok := n.getController().svcRecords[n.ID()] |
|
| 416 |
+ c := n.getController() |
|
| 417 |
+ |
|
| 418 |
+ c.Lock() |
|
| 419 |
+ sr, ok := c.svcRecords[n.ID()] |
|
| 420 |
+ c.Unlock() |
|
| 421 |
+ |
|
| 417 | 422 |
if !ok {
|
| 418 | 423 |
continue |
| 419 | 424 |
} |
| ... | ... |
@@ -454,7 +459,12 @@ func (sb *sandbox) ResolveService(name string) ([]*net.SRV, []net.IP, error) {
|
| 454 | 454 |
for _, ep := range sb.getConnectedEndpoints() {
|
| 455 | 455 |
n := ep.getNetwork() |
| 456 | 456 |
|
| 457 |
- sr, ok := n.getController().svcRecords[n.ID()] |
|
| 457 |
+ c := n.getController() |
|
| 458 |
+ |
|
| 459 |
+ c.Lock() |
|
| 460 |
+ sr, ok := c.svcRecords[n.ID()] |
|
| 461 |
+ c.Unlock() |
|
| 462 |
+ |
|
| 458 | 463 |
if !ok {
|
| 459 | 464 |
continue |
| 460 | 465 |
} |
| ... | ... |
@@ -575,7 +585,11 @@ func (sb *sandbox) resolveName(req string, networkName string, epList []*endpoin |
| 575 | 575 |
ep.Unlock() |
| 576 | 576 |
} |
| 577 | 577 |
|
| 578 |
- sr, ok := n.getController().svcRecords[n.ID()] |
|
| 578 |
+ c := n.getController() |
|
| 579 |
+ c.Lock() |
|
| 580 |
+ sr, ok := c.svcRecords[n.ID()] |
|
| 581 |
+ c.Unlock() |
|
| 582 |
+ |
|
| 579 | 583 |
if !ok {
|
| 580 | 584 |
continue |
| 581 | 585 |
} |
| ... | ... |
@@ -15,7 +15,7 @@ import ( |
| 15 | 15 |
"github.com/opencontainers/runc/libcontainer/configs" |
| 16 | 16 |
) |
| 17 | 17 |
|
| 18 |
-const udsBase = "/var/lib/docker/network/files/" |
|
| 18 |
+const udsBase = "/run/docker/libnetwork/" |
|
| 19 | 19 |
const success = "success" |
| 20 | 20 |
|
| 21 | 21 |
// processSetKeyReexec is a private function that must be called only on an reexec path |
| ... | ... |
@@ -8,6 +8,7 @@ import ( |
| 8 | 8 |
"syscall" |
| 9 | 9 |
|
| 10 | 10 |
"github.com/vishvananda/netlink/nl" |
| 11 |
+ "github.com/vishvananda/netns" |
|
| 11 | 12 |
) |
| 12 | 13 |
|
| 13 | 14 |
// IFA_FLAGS is a u32 attribute. |
| ... | ... |
@@ -192,7 +193,17 @@ type AddrUpdate struct {
|
| 192 | 192 |
// AddrSubscribe takes a chan down which notifications will be sent |
| 193 | 193 |
// when addresses change. Close the 'done' chan to stop subscription. |
| 194 | 194 |
func AddrSubscribe(ch chan<- AddrUpdate, done <-chan struct{}) error {
|
| 195 |
- s, err := nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_IPV4_IFADDR, syscall.RTNLGRP_IPV6_IFADDR) |
|
| 195 |
+ return addrSubscribe(netns.None(), netns.None(), ch, done) |
|
| 196 |
+} |
|
| 197 |
+ |
|
| 198 |
+// AddrSubscribeAt works like AddrSubscribe plus it allows the caller |
|
| 199 |
+// to choose the network namespace in which to subscribe (ns). |
|
| 200 |
+func AddrSubscribeAt(ns netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}) error {
|
|
| 201 |
+ return addrSubscribe(ns, netns.None(), ch, done) |
|
| 202 |
+} |
|
| 203 |
+ |
|
| 204 |
+func addrSubscribe(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}) error {
|
|
| 205 |
+ s, err := nl.SubscribeAt(newNs, curNs, syscall.NETLINK_ROUTE, syscall.RTNLGRP_IPV4_IFADDR, syscall.RTNLGRP_IPV6_IFADDR) |
|
| 196 | 206 |
if err != nil {
|
| 197 | 207 |
return err |
| 198 | 208 |
} |
| ... | ... |
@@ -143,7 +143,7 @@ func (h *Handle) FilterAdd(filter Filter) error {
|
| 143 | 143 |
if u32.RedirIndex != 0 {
|
| 144 | 144 |
u32.Actions = append([]Action{NewMirredAction(u32.RedirIndex)}, u32.Actions...)
|
| 145 | 145 |
} |
| 146 |
- if err := encodeActions(actionsAttr, u32.Actions); err != nil {
|
|
| 146 |
+ if err := EncodeActions(actionsAttr, u32.Actions); err != nil {
|
|
| 147 | 147 |
return err |
| 148 | 148 |
} |
| 149 | 149 |
} else if fw, ok := filter.(*Fw); ok {
|
| ... | ... |
@@ -309,7 +309,7 @@ func toAttrs(tcgen *nl.TcGen, attrs *ActionAttrs) {
|
| 309 | 309 |
attrs.Bindcnt = int(tcgen.Bindcnt) |
| 310 | 310 |
} |
| 311 | 311 |
|
| 312 |
-func encodeActions(attr *nl.RtAttr, actions []Action) error {
|
|
| 312 |
+func EncodeActions(attr *nl.RtAttr, actions []Action) error {
|
|
| 313 | 313 |
tabIndex := int(nl.TCA_ACT_TAB) |
| 314 | 314 |
|
| 315 | 315 |
for _, action := range actions {
|
| ... | ... |
@@ -10,6 +10,7 @@ import ( |
| 10 | 10 |
"unsafe" |
| 11 | 11 |
|
| 12 | 12 |
"github.com/vishvananda/netlink/nl" |
| 13 |
+ "github.com/vishvananda/netns" |
|
| 13 | 14 |
) |
| 14 | 15 |
|
| 15 | 16 |
const SizeofLinkStats = 0x5c |
| ... | ... |
@@ -425,7 +426,7 @@ func addVxlanAttrs(vxlan *Vxlan, linkInfo *nl.RtAttr) {
|
| 425 | 425 |
nl.NewRtAttrChild(data, nl.IFLA_VXLAN_UDP_CSUM, boolAttr(vxlan.UDPCSum)) |
| 426 | 426 |
} |
| 427 | 427 |
if vxlan.GBP {
|
| 428 |
- nl.NewRtAttrChild(data, nl.IFLA_VXLAN_GBP, boolAttr(vxlan.GBP)) |
|
| 428 |
+ nl.NewRtAttrChild(data, nl.IFLA_VXLAN_GBP, []byte{})
|
|
| 429 | 429 |
} |
| 430 | 430 |
if vxlan.NoAge {
|
| 431 | 431 |
nl.NewRtAttrChild(data, nl.IFLA_VXLAN_AGEING, nl.Uint32Attr(0)) |
| ... | ... |
@@ -1011,7 +1012,17 @@ type LinkUpdate struct {
|
| 1011 | 1011 |
// LinkSubscribe takes a chan down which notifications will be sent |
| 1012 | 1012 |
// when links change. Close the 'done' chan to stop subscription. |
| 1013 | 1013 |
func LinkSubscribe(ch chan<- LinkUpdate, done <-chan struct{}) error {
|
| 1014 |
- s, err := nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_LINK) |
|
| 1014 |
+ return linkSubscribe(netns.None(), netns.None(), ch, done) |
|
| 1015 |
+} |
|
| 1016 |
+ |
|
| 1017 |
+// LinkSubscribeAt works like LinkSubscribe plus it allows the caller |
|
| 1018 |
+// to choose the network namespace in which to subscribe (ns). |
|
| 1019 |
+func LinkSubscribeAt(ns netns.NsHandle, ch chan<- LinkUpdate, done <-chan struct{}) error {
|
|
| 1020 |
+ return linkSubscribe(ns, netns.None(), ch, done) |
|
| 1021 |
+} |
|
| 1022 |
+ |
|
| 1023 |
+func linkSubscribe(newNs, curNs netns.NsHandle, ch chan<- LinkUpdate, done <-chan struct{}) error {
|
|
| 1024 |
+ s, err := nl.SubscribeAt(newNs, curNs, syscall.NETLINK_ROUTE, syscall.RTNLGRP_LINK) |
|
| 1015 | 1025 |
if err != nil {
|
| 1016 | 1026 |
return err |
| 1017 | 1027 |
} |
| ... | ... |
@@ -1152,7 +1163,7 @@ func parseVxlanData(link Link, data []syscall.NetlinkRouteAttr) {
|
| 1152 | 1152 |
case nl.IFLA_VXLAN_UDP_CSUM: |
| 1153 | 1153 |
vxlan.UDPCSum = int8(datum.Value[0]) != 0 |
| 1154 | 1154 |
case nl.IFLA_VXLAN_GBP: |
| 1155 |
- vxlan.GBP = int8(datum.Value[0]) != 0 |
|
| 1155 |
+ vxlan.GBP = true |
|
| 1156 | 1156 |
case nl.IFLA_VXLAN_AGEING: |
| 1157 | 1157 |
vxlan.Age = int(native.Uint32(datum.Value[0:4])) |
| 1158 | 1158 |
vxlan.NoAge = vxlan.Age == 0 |
| ... | ... |
@@ -331,24 +331,63 @@ func getNetlinkSocket(protocol int) (*NetlinkSocket, error) {
|
| 331 | 331 |
// moves back into it when done. If newNs is close, the socket will be opened |
| 332 | 332 |
// in the current network namespace. |
| 333 | 333 |
func GetNetlinkSocketAt(newNs, curNs netns.NsHandle, protocol int) (*NetlinkSocket, error) {
|
| 334 |
- var err error |
|
| 334 |
+ c, err := executeInNetns(newNs, curNs) |
|
| 335 |
+ if err != nil {
|
|
| 336 |
+ return nil, err |
|
| 337 |
+ } |
|
| 338 |
+ defer c() |
|
| 339 |
+ return getNetlinkSocket(protocol) |
|
| 340 |
+} |
|
| 335 | 341 |
|
| 342 |
+// executeInNetns sets execution of the code following this call to the |
|
| 343 |
+// network namespace newNs, then moves the thread back to curNs if open, |
|
| 344 |
+// otherwise to the current netns at the time the function was invoked |
|
| 345 |
+// In case of success, the caller is expected to execute the returned function |
|
| 346 |
+// at the end of the code that needs to be executed in the network namespace. |
|
| 347 |
+// Example: |
|
| 348 |
+// func jobAt(...) error {
|
|
| 349 |
+// d, err := executeInNetns(...) |
|
| 350 |
+// if err != nil { return err}
|
|
| 351 |
+// defer d() |
|
| 352 |
+// < code which needs to be executed in specific netns> |
|
| 353 |
+// } |
|
| 354 |
+// TODO: his function probably belongs to netns pkg. |
|
| 355 |
+func executeInNetns(newNs, curNs netns.NsHandle) (func(), error) {
|
|
| 356 |
+ var ( |
|
| 357 |
+ err error |
|
| 358 |
+ moveBack func(netns.NsHandle) error |
|
| 359 |
+ closeNs func() error |
|
| 360 |
+ unlockThd func() |
|
| 361 |
+ ) |
|
| 362 |
+ restore := func() {
|
|
| 363 |
+ // order matters |
|
| 364 |
+ if moveBack != nil {
|
|
| 365 |
+ moveBack(curNs) |
|
| 366 |
+ } |
|
| 367 |
+ if closeNs != nil {
|
|
| 368 |
+ closeNs() |
|
| 369 |
+ } |
|
| 370 |
+ if unlockThd != nil {
|
|
| 371 |
+ unlockThd() |
|
| 372 |
+ } |
|
| 373 |
+ } |
|
| 336 | 374 |
if newNs.IsOpen() {
|
| 337 | 375 |
runtime.LockOSThread() |
| 338 |
- defer runtime.UnlockOSThread() |
|
| 376 |
+ unlockThd = runtime.UnlockOSThread |
|
| 339 | 377 |
if !curNs.IsOpen() {
|
| 340 | 378 |
if curNs, err = netns.Get(); err != nil {
|
| 379 |
+ restore() |
|
| 341 | 380 |
return nil, fmt.Errorf("could not get current namespace while creating netlink socket: %v", err)
|
| 342 | 381 |
} |
| 343 |
- defer curNs.Close() |
|
| 382 |
+ closeNs = curNs.Close |
|
| 344 | 383 |
} |
| 345 | 384 |
if err := netns.Set(newNs); err != nil {
|
| 385 |
+ restore() |
|
| 346 | 386 |
return nil, fmt.Errorf("failed to set into network namespace %d while creating netlink socket: %v", newNs, err)
|
| 347 | 387 |
} |
| 348 |
- defer netns.Set(curNs) |
|
| 388 |
+ moveBack = netns.Set |
|
| 349 | 389 |
} |
| 350 |
- |
|
| 351 |
- return getNetlinkSocket(protocol) |
|
| 390 |
+ return restore, nil |
|
| 352 | 391 |
} |
| 353 | 392 |
|
| 354 | 393 |
// Create a netlink socket with a given protocol (e.g. NETLINK_ROUTE) |
| ... | ... |
@@ -377,6 +416,18 @@ func Subscribe(protocol int, groups ...uint) (*NetlinkSocket, error) {
|
| 377 | 377 |
return s, nil |
| 378 | 378 |
} |
| 379 | 379 |
|
| 380 |
+// SubscribeAt works like Subscribe plus let's the caller choose the network |
|
| 381 |
+// namespace in which the socket would be opened (newNs). Then control goes back |
|
| 382 |
+// to curNs if open, otherwise to the netns at the time this function was called. |
|
| 383 |
+func SubscribeAt(newNs, curNs netns.NsHandle, protocol int, groups ...uint) (*NetlinkSocket, error) {
|
|
| 384 |
+ c, err := executeInNetns(newNs, curNs) |
|
| 385 |
+ if err != nil {
|
|
| 386 |
+ return nil, err |
|
| 387 |
+ } |
|
| 388 |
+ defer c() |
|
| 389 |
+ return Subscribe(protocol, groups...) |
|
| 390 |
+} |
|
| 391 |
+ |
|
| 380 | 392 |
func (s *NetlinkSocket) Close() {
|
| 381 | 393 |
syscall.Close(s.fd) |
| 382 | 394 |
s.fd = -1 |
| ... | ... |
@@ -10,6 +10,7 @@ const ( |
| 10 | 10 |
SizeofXfrmUsersaInfo = 0xe0 |
| 11 | 11 |
SizeofXfrmAlgo = 0x44 |
| 12 | 12 |
SizeofXfrmAlgoAuth = 0x48 |
| 13 |
+ SizeofXfrmAlgoAEAD = 0x48 |
|
| 13 | 14 |
SizeofXfrmEncapTmpl = 0x18 |
| 14 | 15 |
SizeofXfrmUsersaFlush = 0x8 |
| 15 | 16 |
) |
| ... | ... |
@@ -194,6 +195,35 @@ func (msg *XfrmAlgoAuth) Serialize() []byte {
|
| 194 | 194 |
// char alg_key[0]; |
| 195 | 195 |
// } |
| 196 | 196 |
|
| 197 |
+type XfrmAlgoAEAD struct {
|
|
| 198 |
+ AlgName [64]byte |
|
| 199 |
+ AlgKeyLen uint32 |
|
| 200 |
+ AlgICVLen uint32 |
|
| 201 |
+ AlgKey []byte |
|
| 202 |
+} |
|
| 203 |
+ |
|
| 204 |
+func (msg *XfrmAlgoAEAD) Len() int {
|
|
| 205 |
+ return SizeofXfrmAlgoAEAD + int(msg.AlgKeyLen/8) |
|
| 206 |
+} |
|
| 207 |
+ |
|
| 208 |
+func DeserializeXfrmAlgoAEAD(b []byte) *XfrmAlgoAEAD {
|
|
| 209 |
+ ret := XfrmAlgoAEAD{}
|
|
| 210 |
+ copy(ret.AlgName[:], b[0:64]) |
|
| 211 |
+ ret.AlgKeyLen = *(*uint32)(unsafe.Pointer(&b[64])) |
|
| 212 |
+ ret.AlgICVLen = *(*uint32)(unsafe.Pointer(&b[68])) |
|
| 213 |
+ ret.AlgKey = b[72:ret.Len()] |
|
| 214 |
+ return &ret |
|
| 215 |
+} |
|
| 216 |
+ |
|
| 217 |
+func (msg *XfrmAlgoAEAD) Serialize() []byte {
|
|
| 218 |
+ b := make([]byte, msg.Len()) |
|
| 219 |
+ copy(b[0:64], msg.AlgName[:]) |
|
| 220 |
+ copy(b[64:68], (*(*[4]byte)(unsafe.Pointer(&msg.AlgKeyLen)))[:]) |
|
| 221 |
+ copy(b[68:72], (*(*[4]byte)(unsafe.Pointer(&msg.AlgICVLen)))[:]) |
|
| 222 |
+ copy(b[72:msg.Len()], msg.AlgKey[:]) |
|
| 223 |
+ return b |
|
| 224 |
+} |
|
| 225 |
+ |
|
| 197 | 226 |
// struct xfrm_encap_tmpl {
|
| 198 | 227 |
// __u16 encap_type; |
| 199 | 228 |
// __be16 encap_sport; |
| ... | ... |
@@ -6,6 +6,7 @@ import ( |
| 6 | 6 |
"syscall" |
| 7 | 7 |
|
| 8 | 8 |
"github.com/vishvananda/netlink/nl" |
| 9 |
+ "github.com/vishvananda/netns" |
|
| 9 | 10 |
) |
| 10 | 11 |
|
| 11 | 12 |
// RtAttr is shared so it is in netlink_linux.go |
| ... | ... |
@@ -421,7 +422,17 @@ func (h *Handle) RouteGet(destination net.IP) ([]Route, error) {
|
| 421 | 421 |
// RouteSubscribe takes a chan down which notifications will be sent |
| 422 | 422 |
// when routes are added or deleted. Close the 'done' chan to stop subscription. |
| 423 | 423 |
func RouteSubscribe(ch chan<- RouteUpdate, done <-chan struct{}) error {
|
| 424 |
- s, err := nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_IPV4_ROUTE, syscall.RTNLGRP_IPV6_ROUTE) |
|
| 424 |
+ return routeSubscribeAt(netns.None(), netns.None(), ch, done) |
|
| 425 |
+} |
|
| 426 |
+ |
|
| 427 |
+// RouteSubscribeAt works like RouteSubscribe plus it allows the caller |
|
| 428 |
+// to choose the network namespace in which to subscribe (ns). |
|
| 429 |
+func RouteSubscribeAt(ns netns.NsHandle, ch chan<- RouteUpdate, done <-chan struct{}) error {
|
|
| 430 |
+ return routeSubscribeAt(ns, netns.None(), ch, done) |
|
| 431 |
+} |
|
| 432 |
+ |
|
| 433 |
+func routeSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- RouteUpdate, done <-chan struct{}) error {
|
|
| 434 |
+ s, err := nl.SubscribeAt(newNs, curNs, syscall.NETLINK_ROUTE, syscall.RTNLGRP_IPV4_ROUTE, syscall.RTNLGRP_IPV6_ROUTE) |
|
| 425 | 435 |
if err != nil {
|
| 426 | 436 |
return err |
| 427 | 437 |
} |
| ... | ... |
@@ -10,10 +10,18 @@ type XfrmStateAlgo struct {
|
| 10 | 10 |
Name string |
| 11 | 11 |
Key []byte |
| 12 | 12 |
TruncateLen int // Auth only |
| 13 |
+ ICVLen int // AEAD only |
|
| 13 | 14 |
} |
| 14 | 15 |
|
| 15 | 16 |
func (a XfrmStateAlgo) String() string {
|
| 16 |
- return fmt.Sprintf("{Name: %s, Key: 0x%x, TruncateLen: %d}", a.Name, a.Key, a.TruncateLen)
|
|
| 17 |
+ base := fmt.Sprintf("{Name: %s, Key: 0x%x", a.Name, a.Key)
|
|
| 18 |
+ if a.TruncateLen != 0 {
|
|
| 19 |
+ base = fmt.Sprintf("%s, Truncate length: %d", base, a.TruncateLen)
|
|
| 20 |
+ } |
|
| 21 |
+ if a.ICVLen != 0 {
|
|
| 22 |
+ base = fmt.Sprintf("%s, ICV length: %d", base, a.ICVLen)
|
|
| 23 |
+ } |
|
| 24 |
+ return fmt.Sprintf("%s}", base)
|
|
| 17 | 25 |
} |
| 18 | 26 |
|
| 19 | 27 |
// EncapType is an enum representing the optional packet encapsulation. |
| ... | ... |
@@ -73,12 +81,13 @@ type XfrmState struct {
|
| 73 | 73 |
Mark *XfrmMark |
| 74 | 74 |
Auth *XfrmStateAlgo |
| 75 | 75 |
Crypt *XfrmStateAlgo |
| 76 |
+ Aead *XfrmStateAlgo |
|
| 76 | 77 |
Encap *XfrmStateEncap |
| 77 | 78 |
} |
| 78 | 79 |
|
| 79 | 80 |
func (sa XfrmState) String() string {
|
| 80 |
- return fmt.Sprintf("Dst: %v, Src: %v, Proto: %s, Mode: %s, SPI: 0x%x, ReqID: 0x%x, ReplayWindow: %d, Mark: %v, Auth: %v, Crypt: %v, Encap: %v",
|
|
| 81 |
- sa.Dst, sa.Src, sa.Proto, sa.Mode, sa.Spi, sa.Reqid, sa.ReplayWindow, sa.Mark, sa.Auth, sa.Crypt, sa.Encap) |
|
| 81 |
+ return fmt.Sprintf("Dst: %v, Src: %v, Proto: %s, Mode: %s, SPI: 0x%x, ReqID: 0x%x, ReplayWindow: %d, Mark: %v, Auth: %v, Crypt: %v, Aead: %v,Encap: %v",
|
|
| 82 |
+ sa.Dst, sa.Src, sa.Proto, sa.Mode, sa.Spi, sa.Reqid, sa.ReplayWindow, sa.Mark, sa.Auth, sa.Crypt, sa.Aead, sa.Encap) |
|
| 82 | 83 |
} |
| 83 | 84 |
func (sa XfrmState) Print(stats bool) string {
|
| 84 | 85 |
if !stats {
|
| ... | ... |
@@ -35,6 +35,20 @@ func writeStateAlgoAuth(a *XfrmStateAlgo) []byte {
|
| 35 | 35 |
return algo.Serialize() |
| 36 | 36 |
} |
| 37 | 37 |
|
| 38 |
+func writeStateAlgoAead(a *XfrmStateAlgo) []byte {
|
|
| 39 |
+ algo := nl.XfrmAlgoAEAD{
|
|
| 40 |
+ AlgKeyLen: uint32(len(a.Key) * 8), |
|
| 41 |
+ AlgICVLen: uint32(a.ICVLen), |
|
| 42 |
+ AlgKey: a.Key, |
|
| 43 |
+ } |
|
| 44 |
+ end := len(a.Name) |
|
| 45 |
+ if end > 64 {
|
|
| 46 |
+ end = 64 |
|
| 47 |
+ } |
|
| 48 |
+ copy(algo.AlgName[:end], a.Name) |
|
| 49 |
+ return algo.Serialize() |
|
| 50 |
+} |
|
| 51 |
+ |
|
| 38 | 52 |
func writeMark(m *XfrmMark) []byte {
|
| 39 | 53 |
mark := &nl.XfrmMark{
|
| 40 | 54 |
Value: m.Value, |
| ... | ... |
@@ -97,6 +111,10 @@ func (h *Handle) xfrmStateAddOrUpdate(state *XfrmState, nlProto int) error {
|
| 97 | 97 |
out := nl.NewRtAttr(nl.XFRMA_ALG_CRYPT, writeStateAlgo(state.Crypt)) |
| 98 | 98 |
req.AddData(out) |
| 99 | 99 |
} |
| 100 |
+ if state.Aead != nil {
|
|
| 101 |
+ out := nl.NewRtAttr(nl.XFRMA_ALG_AEAD, writeStateAlgoAead(state.Aead)) |
|
| 102 |
+ req.AddData(out) |
|
| 103 |
+ } |
|
| 100 | 104 |
if state.Encap != nil {
|
| 101 | 105 |
encapData := make([]byte, nl.SizeofXfrmEncapTmpl) |
| 102 | 106 |
encap := nl.DeserializeXfrmEncapTmpl(encapData) |
| ... | ... |
@@ -271,6 +289,12 @@ func parseXfrmState(m []byte, family int) (*XfrmState, error) {
|
| 271 | 271 |
state.Auth.Name = nl.BytesToString(algo.AlgName[:]) |
| 272 | 272 |
state.Auth.Key = algo.AlgKey |
| 273 | 273 |
state.Auth.TruncateLen = int(algo.AlgTruncLen) |
| 274 |
+ case nl.XFRMA_ALG_AEAD: |
|
| 275 |
+ state.Aead = new(XfrmStateAlgo) |
|
| 276 |
+ algo := nl.DeserializeXfrmAlgoAEAD(attr.Value[:]) |
|
| 277 |
+ state.Aead.Name = nl.BytesToString(algo.AlgName[:]) |
|
| 278 |
+ state.Aead.Key = algo.AlgKey |
|
| 279 |
+ state.Aead.ICVLen = int(algo.AlgICVLen) |
|
| 274 | 280 |
case nl.XFRMA_ENCAP: |
| 275 | 281 |
encap := nl.DeserializeXfrmEncapTmpl(attr.Value[:]) |
| 276 | 282 |
state.Encap = new(XfrmStateEncap) |