Browse code

Wait for device removal if deferredRemoval=true and deferredDeletion=false

There have been some cases where umount, a device can be busy for a very
short duration. Maybe its udev rules, or maybe it is runc related races
or probably it is something else. We don't know yet.

If deferred removal is enabled but deferred deletion is not, then for the
case of "docker run -ti --rm fedora bash", a container will exit, device
will be deferred removed and then immediately a call will come to delete
the device. It is possible that deletion will fail if device was busy
at that time.

A device can't be deleted if it can't be removed/deactivated first. There
is only one exception and that is when deferred deletion is on. In that
case graph driver will keep track of deleted device and try to delete it
later and return success to caller.

Always make sure that device deactivation is synchronous when device is
being deleted (except the case when deferred deletion is enabled).

This should also take care of small races when device is busy for a short
duration and it is being deleted.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>

Vivek Goyal authored on 2017/07/01 03:27:26
Showing 2 changed files
... ...
@@ -2088,7 +2088,16 @@ func (devices *DeviceSet) deleteDevice(info *devInfo, syncDelete bool) error {
2088 2088
 	}
2089 2089
 
2090 2090
 	// Try to deactivate device in case it is active.
2091
-	if err := devices.deactivateDevice(info); err != nil {
2091
+	// If deferred removal is enabled and deferred deletion is disabled
2092
+	// then make sure device is removed synchronously. There have been
2093
+	// some cases of device being busy for short duration and we would
2094
+	// rather busy wait for device removal to take care of these cases.
2095
+	deferredRemove := devices.deferredRemove
2096
+	if !devices.deferredDelete {
2097
+		deferredRemove = false
2098
+	}
2099
+
2100
+	if err := devices.deactivateDeviceMode(info, deferredRemove); err != nil {
2092 2101
 		logrus.Debugf("devmapper: Error deactivating device: %s", err)
2093 2102
 		return err
2094 2103
 	}
... ...
@@ -2145,6 +2154,11 @@ func (devices *DeviceSet) deactivatePool() error {
2145 2145
 }
2146 2146
 
2147 2147
 func (devices *DeviceSet) deactivateDevice(info *devInfo) error {
2148
+	return devices.deactivateDeviceMode(info, devices.deferredRemove)
2149
+}
2150
+
2151
+func (devices *DeviceSet) deactivateDeviceMode(info *devInfo, deferredRemove bool) error {
2152
+	var err error
2148 2153
 	logrus.Debugf("devmapper: deactivateDevice START(%s)", info.Hash)
2149 2154
 	defer logrus.Debugf("devmapper: deactivateDevice END(%s)", info.Hash)
2150 2155
 
... ...
@@ -2157,14 +2171,17 @@ func (devices *DeviceSet) deactivateDevice(info *devInfo) error {
2157 2157
 		return nil
2158 2158
 	}
2159 2159
 
2160
-	if devices.deferredRemove {
2161
-		if err := devicemapper.RemoveDeviceDeferred(info.Name()); err != nil {
2162
-			return err
2163
-		}
2160
+	if deferredRemove {
2161
+		err = devicemapper.RemoveDeviceDeferred(info.Name())
2164 2162
 	} else {
2165
-		if err := devices.removeDevice(info.Name()); err != nil {
2166
-			return err
2167
-		}
2163
+		err = devices.removeDevice(info.Name())
2164
+	}
2165
+
2166
+	// This function's semantics is such that it does not return an
2167
+	// error if device does not exist. So if device went away by
2168
+	// the time we actually tried to remove it, do not return error.
2169
+	if err != devicemapper.ErrEnxio {
2170
+		return err
2168 2171
 	}
2169 2172
 	return nil
2170 2173
 }
... ...
@@ -336,10 +336,14 @@ func RemoveDevice(name string) error {
336 336
 	defer UdevWait(cookie)
337 337
 
338 338
 	dmSawBusy = false // reset before the task is run
339
+	dmSawEnxio = false
339 340
 	if err = task.run(); err != nil {
340 341
 		if dmSawBusy {
341 342
 			return ErrBusy
342 343
 		}
344
+		if dmSawEnxio {
345
+			return ErrEnxio
346
+		}
343 347
 		return fmt.Errorf("devicemapper: Error running RemoveDevice %s", err)
344 348
 	}
345 349
 
... ...
@@ -380,7 +384,11 @@ func RemoveDeviceDeferred(name string) error {
380 380
 	// by udev, what UdevWait is just cleaning up the semaphore.
381 381
 	defer UdevWait(cookie)
382 382
 
383
+	dmSawEnxio = false
383 384
 	if err = task.run(); err != nil {
385
+		if dmSawEnxio {
386
+			return ErrEnxio
387
+		}
384 388
 		return fmt.Errorf("devicemapper: Error running RemoveDeviceDeferred %s", err)
385 389
 	}
386 390