Fixing a case of dangling endpoint during ungraceful daemon restart
| ... | ... |
@@ -893,6 +893,13 @@ func (container *Container) buildCreateEndpointOptions(n libnetwork.Network) ([] |
| 893 | 893 |
} |
| 894 | 894 |
|
| 895 | 895 |
func (container *Container) allocateNetwork() error {
|
| 896 |
+ sb := container.getNetworkSandbox() |
|
| 897 |
+ if sb != nil {
|
|
| 898 |
+ // Cleanup any stale sandbox left over due to ungraceful daemon shutdown |
|
| 899 |
+ if err := sb.Delete(); err != nil {
|
|
| 900 |
+ logrus.Errorf("failed to cleanup up stale network sandbox for container %s", container.ID)
|
|
| 901 |
+ } |
|
| 902 |
+ } |
|
| 896 | 903 |
updateSettings := false |
| 897 | 904 |
if len(container.NetworkSettings.Networks) == 0 {
|
| 898 | 905 |
mode := container.hostConfig.NetworkMode |
| ... | ... |
@@ -919,6 +926,18 @@ func (container *Container) allocateNetwork() error {
|
| 919 | 919 |
return container.writeHostConfig() |
| 920 | 920 |
} |
| 921 | 921 |
|
| 922 |
+func (container *Container) getNetworkSandbox() libnetwork.Sandbox {
|
|
| 923 |
+ var sb libnetwork.Sandbox |
|
| 924 |
+ container.daemon.netController.WalkSandboxes(func(s libnetwork.Sandbox) bool {
|
|
| 925 |
+ if s.ContainerID() == container.ID {
|
|
| 926 |
+ sb = s |
|
| 927 |
+ return true |
|
| 928 |
+ } |
|
| 929 |
+ return false |
|
| 930 |
+ }) |
|
| 931 |
+ return sb |
|
| 932 |
+} |
|
| 933 |
+ |
|
| 922 | 934 |
// ConnectToNetwork connects a container to a netork |
| 923 | 935 |
func (container *Container) ConnectToNetwork(idOrName string) error {
|
| 924 | 936 |
if !container.Running {
|
| ... | ... |
@@ -984,14 +1003,7 @@ func (container *Container) connectToNetwork(idOrName string, updateSettings boo |
| 984 | 984 |
return err |
| 985 | 985 |
} |
| 986 | 986 |
|
| 987 |
- var sb libnetwork.Sandbox |
|
| 988 |
- controller.WalkSandboxes(func(s libnetwork.Sandbox) bool {
|
|
| 989 |
- if s.ContainerID() == container.ID {
|
|
| 990 |
- sb = s |
|
| 991 |
- return true |
|
| 992 |
- } |
|
| 993 |
- return false |
|
| 994 |
- }) |
|
| 987 |
+ sb := container.getNetworkSandbox() |
|
| 995 | 988 |
if sb == nil {
|
| 996 | 989 |
options, err := container.buildSandboxOptions(n) |
| 997 | 990 |
if err != nil {
|
| ... | ... |
@@ -21,7 +21,7 @@ clone git github.com/vdemeester/shakers 3c10293ce22b900c27acad7b28656196fcc2f73b |
| 21 | 21 |
clone git golang.org/x/net 3cffabab72adf04f8e3b01c5baf775361837b5fe https://github.com/golang/net.git |
| 22 | 22 |
|
| 23 | 23 |
#get libnetwork packages |
| 24 |
-clone git github.com/docker/libnetwork 20351a84241aa1278493d74492db947336989be6 |
|
| 24 |
+clone git github.com/docker/libnetwork 5fc6ba506daa7914f4d58befb38480ec8e9c9f70 |
|
| 25 | 25 |
clone git github.com/armon/go-metrics eb0af217e5e9747e41dd5303755356b62d28e3ec |
| 26 | 26 |
clone git github.com/hashicorp/go-msgpack 71c2886f5a673a35f909803f38ece5810165097b |
| 27 | 27 |
clone git github.com/hashicorp/memberlist 9a1e242e454d2443df330bdd51a436d5a9058fc4 |
| ... | ... |
@@ -118,6 +118,12 @@ func (d *driver) Leave(nid, eid string) error {
|
| 118 | 118 |
return fmt.Errorf("could not find network with id %s", nid)
|
| 119 | 119 |
} |
| 120 | 120 |
|
| 121 |
+ ep := n.endpoint(eid) |
|
| 122 |
+ |
|
| 123 |
+ if ep == nil {
|
|
| 124 |
+ return types.InternalMaskableErrorf("could not find endpoint with id %s", eid)
|
|
| 125 |
+ } |
|
| 126 |
+ |
|
| 121 | 127 |
if d.notifyCh != nil {
|
| 122 | 128 |
d.notifyCh <- ovNotify{
|
| 123 | 129 |
action: "leave", |
| ... | ... |
@@ -168,6 +168,7 @@ func (sb *sandbox) Delete() error {
|
| 168 | 168 |
c := sb.controller |
| 169 | 169 |
|
| 170 | 170 |
// Detach from all endpoints |
| 171 |
+ retain := false |
|
| 171 | 172 |
for _, ep := range sb.getConnectedEndpoints() {
|
| 172 | 173 |
// endpoint in the Gateway network will be cleaned up |
| 173 | 174 |
// when when sandbox no longer needs external connectivity |
| ... | ... |
@@ -176,14 +177,22 @@ func (sb *sandbox) Delete() error {
|
| 176 | 176 |
} |
| 177 | 177 |
|
| 178 | 178 |
if err := ep.Leave(sb); err != nil {
|
| 179 |
+ retain = true |
|
| 179 | 180 |
log.Warnf("Failed detaching sandbox %s from endpoint %s: %v\n", sb.ID(), ep.ID(), err)
|
| 180 | 181 |
} |
| 181 | 182 |
|
| 182 | 183 |
if err := ep.Delete(); err != nil {
|
| 184 |
+ retain = true |
|
| 183 | 185 |
log.Warnf("Failed deleting endpoint %s: %v\n", ep.ID(), err)
|
| 184 | 186 |
} |
| 185 | 187 |
} |
| 186 | 188 |
|
| 189 |
+ if retain {
|
|
| 190 |
+ sb.Lock() |
|
| 191 |
+ sb.inDelete = false |
|
| 192 |
+ sb.Unlock() |
|
| 193 |
+ return fmt.Errorf("could not cleanup all the endpoints in container %s / sandbox %s", sb.containerID, sb.id)
|
|
| 194 |
+ } |
|
| 187 | 195 |
// Container is going away. Path cache in etchosts is most |
| 188 | 196 |
// likely not required any more. Drop it. |
| 189 | 197 |
etchosts.Drop(sb.config.hostsPath) |
| ... | ... |
@@ -3,6 +3,7 @@ package libnetwork |
| 3 | 3 |
import ( |
| 4 | 4 |
"container/heap" |
| 5 | 5 |
"encoding/json" |
| 6 |
+ "sync" |
|
| 6 | 7 |
|
| 7 | 8 |
"github.com/Sirupsen/logrus" |
| 8 | 9 |
"github.com/docker/libnetwork/datastore" |
| ... | ... |
@@ -119,8 +120,9 @@ func (sbs *sbState) DataScope() string {
|
| 119 | 119 |
|
| 120 | 120 |
func (sb *sandbox) storeUpdate() error {
|
| 121 | 121 |
sbs := &sbState{
|
| 122 |
- c: sb.controller, |
|
| 123 |
- ID: sb.id, |
|
| 122 |
+ c: sb.controller, |
|
| 123 |
+ ID: sb.id, |
|
| 124 |
+ Cid: sb.containerID, |
|
| 124 | 125 |
} |
| 125 | 126 |
|
| 126 | 127 |
retry: |
| ... | ... |
@@ -197,15 +199,17 @@ func (c *controller) sandboxCleanup() {
|
| 197 | 197 |
|
| 198 | 198 |
for _, eps := range sbs.Eps {
|
| 199 | 199 |
n, err := c.getNetworkFromStore(eps.Nid) |
| 200 |
+ var ep *endpoint |
|
| 200 | 201 |
if err != nil {
|
| 201 | 202 |
logrus.Errorf("getNetworkFromStore for nid %s failed while trying to build sandbox for cleanup: %v", eps.Nid, err)
|
| 202 |
- continue |
|
| 203 |
- } |
|
| 204 |
- |
|
| 205 |
- ep, err := n.getEndpointFromStore(eps.Eid) |
|
| 206 |
- if err != nil {
|
|
| 207 |
- logrus.Errorf("getEndpointFromStore for eid %s failed while trying to build sandbox for cleanup: %v", eps.Eid, err)
|
|
| 208 |
- continue |
|
| 203 |
+ n = &network{id: eps.Nid, ctrlr: c, drvOnce: &sync.Once{}}
|
|
| 204 |
+ ep = &endpoint{id: eps.Eid, network: n}
|
|
| 205 |
+ } else {
|
|
| 206 |
+ ep, err = n.getEndpointFromStore(eps.Eid) |
|
| 207 |
+ if err != nil {
|
|
| 208 |
+ logrus.Errorf("getEndpointFromStore for eid %s failed while trying to build sandbox for cleanup: %v", eps.Eid, err)
|
|
| 209 |
+ ep = &endpoint{id: eps.Eid, network: n}
|
|
| 210 |
+ } |
|
| 209 | 211 |
} |
| 210 | 212 |
|
| 211 | 213 |
heap.Push(&sb.endpoints, ep) |