Browse code

devmapper: Implement deferred deletion functionality

Finally here is the patch to implement deferred deletion functionality.
Deferred deleted devices are marked as "Deleted" in device meta file.

First we try to delete the device and only if deletion fails and user has
enabled deferred deletion, device is marked for deferred deletion.

When docker starts up again, we go through list of deleted devices and
try to delete these again.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>

Vivek Goyal authored on 2015/10/07 06:37:21
Showing 3 changed files
... ...
@@ -58,6 +58,7 @@ type devInfo struct {
58 58
 	Size          uint64 `json:"size"`
59 59
 	TransactionID uint64 `json:"transaction_id"`
60 60
 	Initialized   bool   `json:"initialized"`
61
+	Deleted       bool   `json:"deleted"`
61 62
 	devices       *DeviceSet
62 63
 
63 64
 	mountCount int
... ...
@@ -425,6 +426,8 @@ func (devices *DeviceSet) deviceFileWalkFunction(path string, finfo os.FileInfo)
425 425
 		hash = ""
426 426
 	}
427 427
 
428
+	// Include deleted devices also as cleanup delete device logic
429
+	// will go through it and see if there are any deleted devices.
428 430
 	if _, err := devices.lookupDevice(hash); err != nil {
429 431
 		return fmt.Errorf("Error looking up device %s:%v", hash, err)
430 432
 	}
... ...
@@ -494,9 +497,13 @@ func (devices *DeviceSet) registerDevice(id int, hash string, size uint64, trans
494 494
 	return info, nil
495 495
 }
496 496
 
497
-func (devices *DeviceSet) activateDeviceIfNeeded(info *devInfo) error {
497
+func (devices *DeviceSet) activateDeviceIfNeeded(info *devInfo, ignoreDeleted bool) error {
498 498
 	logrus.Debugf("activateDeviceIfNeeded(%v)", info.Hash)
499 499
 
500
+	if info.Deleted && !ignoreDeleted {
501
+		return fmt.Errorf("devmapper: Can't activate device %v as it is marked for deletion", info.Hash)
502
+	}
503
+
500 504
 	// Make sure deferred removal on device is canceled, if one was
501 505
 	// scheduled.
502 506
 	if err := devices.cancelDeferredRemoval(info); err != nil {
... ...
@@ -570,6 +577,35 @@ func (devices *DeviceSet) migrateOldMetaData() error {
570 570
 	return nil
571 571
 }
572 572
 
573
+// Cleanup deleted devices. It assumes that all the devices have been
574
+// loaded in the hash table. Should be called with devices.Lock() held.
575
+// Will drop the lock for device deletion and return with lock acquired.
576
+func (devices *DeviceSet) cleanupDeletedDevices() error {
577
+	var deletedDevices []*devInfo
578
+
579
+	for _, info := range devices.Devices {
580
+		if !info.Deleted {
581
+			continue
582
+		}
583
+		logrus.Debugf("devmapper: Found deleted device %s.", info.Hash)
584
+		deletedDevices = append(deletedDevices, info)
585
+	}
586
+
587
+	// Delete the deleted devices. DeleteDevice() first takes the info lock
588
+	// and then devices.Lock(). So drop it to avoid deadlock.
589
+	devices.Unlock()
590
+	defer devices.Lock()
591
+
592
+	for _, info := range deletedDevices {
593
+		// This will again try deferred deletion.
594
+		if err := devices.DeleteDevice(info.Hash, false); err != nil {
595
+			logrus.Warnf("devmapper: Deletion of device %s, device_id=%v failed:%v", info.Hash, info.DeviceID, err)
596
+		}
597
+	}
598
+
599
+	return nil
600
+}
601
+
573 602
 func (devices *DeviceSet) initMetaData() error {
574 603
 	devices.Lock()
575 604
 	defer devices.Unlock()
... ...
@@ -594,6 +630,11 @@ func (devices *DeviceSet) initMetaData() error {
594 594
 	if err := devices.processPendingTransaction(); err != nil {
595 595
 		return err
596 596
 	}
597
+
598
+	if err := devices.cleanupDeletedDevices(); err != nil {
599
+		return err
600
+	}
601
+
597 602
 	return nil
598 603
 }
599 604
 
... ...
@@ -758,7 +799,7 @@ func (devices *DeviceSet) verifyBaseDeviceUUID(baseInfo *devInfo) error {
758 758
 	devices.Lock()
759 759
 	defer devices.Unlock()
760 760
 
761
-	if err := devices.activateDeviceIfNeeded(baseInfo); err != nil {
761
+	if err := devices.activateDeviceIfNeeded(baseInfo, false); err != nil {
762 762
 		return err
763 763
 	}
764 764
 
... ...
@@ -780,7 +821,7 @@ func (devices *DeviceSet) saveBaseDeviceUUID(baseInfo *devInfo) error {
780 780
 	devices.Lock()
781 781
 	defer devices.Unlock()
782 782
 
783
-	if err := devices.activateDeviceIfNeeded(baseInfo); err != nil {
783
+	if err := devices.activateDeviceIfNeeded(baseInfo, false); err != nil {
784 784
 		return err
785 785
 	}
786 786
 
... ...
@@ -807,7 +848,7 @@ func (devices *DeviceSet) createBaseImage() error {
807 807
 
808 808
 	logrus.Debugf("Creating filesystem on base device-mapper thin volume")
809 809
 
810
-	if err := devices.activateDeviceIfNeeded(info); err != nil {
810
+	if err := devices.activateDeviceIfNeeded(info, false); err != nil {
811 811
 		return err
812 812
 	}
813 813
 
... ...
@@ -870,7 +911,7 @@ func (devices *DeviceSet) setupBaseImage() error {
870 870
 	// fresh.
871 871
 
872 872
 	if oldInfo != nil {
873
-		if oldInfo.Initialized {
873
+		if oldInfo.Initialized && !oldInfo.Deleted {
874 874
 			if err := devices.setupVerifyBaseImageUUID(oldInfo); err != nil {
875 875
 				return err
876 876
 			}
... ...
@@ -879,7 +920,10 @@ func (devices *DeviceSet) setupBaseImage() error {
879 879
 		}
880 880
 
881 881
 		logrus.Debugf("Removing uninitialized base image")
882
-		if err := devices.DeleteDevice(""); err != nil {
882
+		// If previous base device is in deferred delete state,
883
+		// that needs to be cleaned up first. So don't try
884
+		// deferred deletion.
885
+		if err := devices.DeleteDevice("", true); err != nil {
883 886
 			return err
884 887
 		}
885 888
 	}
... ...
@@ -1513,19 +1557,26 @@ func (devices *DeviceSet) AddDevice(hash, baseHash string) error {
1513 1513
 	logrus.Debugf("[deviceset] AddDevice(hash=%s basehash=%s)", hash, baseHash)
1514 1514
 	defer logrus.Debugf("[deviceset] AddDevice(hash=%s basehash=%s) END", hash, baseHash)
1515 1515
 
1516
+	// If a deleted device exists, return error.
1516 1517
 	baseInfo, err := devices.lookupDeviceWithLock(baseHash)
1517 1518
 	if err != nil {
1518 1519
 		return err
1519 1520
 	}
1520 1521
 
1522
+	if baseInfo.Deleted {
1523
+		return fmt.Errorf("devmapper: Base device %v has been marked for deferred deletion", baseInfo.Hash)
1524
+	}
1525
+
1521 1526
 	baseInfo.lock.Lock()
1522 1527
 	defer baseInfo.lock.Unlock()
1523 1528
 
1524 1529
 	devices.Lock()
1525 1530
 	defer devices.Unlock()
1526 1531
 
1532
+	// Also include deleted devices in case hash of new device is
1533
+	// same as one of the deleted devices.
1527 1534
 	if info, _ := devices.lookupDevice(hash); info != nil {
1528
-		return fmt.Errorf("device %s already exists", hash)
1535
+		return fmt.Errorf("device %s already exists. Deleted=%v", hash, info.Deleted)
1529 1536
 	}
1530 1537
 
1531 1538
 	if err := devices.createRegisterSnapDevice(hash, baseInfo); err != nil {
... ...
@@ -1535,8 +1586,26 @@ func (devices *DeviceSet) AddDevice(hash, baseHash string) error {
1535 1535
 	return nil
1536 1536
 }
1537 1537
 
1538
+func (devices *DeviceSet) markForDeferredDeletion(info *devInfo) error {
1539
+	// If device is already in deleted state, there is nothing to be done.
1540
+	if info.Deleted {
1541
+		return nil
1542
+	}
1543
+
1544
+	logrus.Debugf("devmapper: Marking device %s for deferred deletion.", info.Hash)
1545
+
1546
+	info.Deleted = true
1547
+
1548
+	// save device metadata to refelect deleted state.
1549
+	if err := devices.saveMetadata(info); err != nil {
1550
+		info.Deleted = false
1551
+		return err
1552
+	}
1553
+	return nil
1554
+}
1555
+
1538 1556
 // Should be caled with devices.Lock() held.
1539
-func (devices *DeviceSet) deleteTransaction(info *devInfo) error {
1557
+func (devices *DeviceSet) deleteTransaction(info *devInfo, syncDelete bool) error {
1540 1558
 	if err := devices.openTransaction(info.Hash, info.DeviceID); err != nil {
1541 1559
 		logrus.Debugf("Error opening transaction hash = %s deviceId = %d", "", info.DeviceID)
1542 1560
 		return err
... ...
@@ -1544,13 +1613,25 @@ func (devices *DeviceSet) deleteTransaction(info *devInfo) error {
1544 1544
 
1545 1545
 	defer devices.closeTransaction()
1546 1546
 
1547
-	if err := devicemapper.DeleteDevice(devices.getPoolDevName(), info.DeviceID); err != nil {
1548
-		logrus.Debugf("Error deleting device: %s", err)
1549
-		return err
1547
+	err := devicemapper.DeleteDevice(devices.getPoolDevName(), info.DeviceID)
1548
+	if err != nil {
1549
+		// If syncDelete is true, we want to return error. If deferred
1550
+		// deletion is not enabled, we return an error. If error is
1551
+		// something other then EBUSY, return an error.
1552
+		if syncDelete || !devices.deferredDelete || err != devicemapper.ErrBusy {
1553
+			logrus.Debugf("Error deleting device: %s", err)
1554
+			return err
1555
+		}
1550 1556
 	}
1551 1557
 
1552
-	if err := devices.unregisterDevice(info.DeviceID, info.Hash); err != nil {
1553
-		return err
1558
+	if err == nil {
1559
+		if err := devices.unregisterDevice(info.DeviceID, info.Hash); err != nil {
1560
+			return err
1561
+		}
1562
+	} else {
1563
+		if err := devices.markForDeferredDeletion(info); err != nil {
1564
+			return err
1565
+		}
1554 1566
 	}
1555 1567
 
1556 1568
 	return nil
... ...
@@ -1562,8 +1643,10 @@ func (devices *DeviceSet) issueDiscard(info *devInfo) error {
1562 1562
 	defer logrus.Debugf("devmapper: issueDiscard(device: %s). END", info.Hash)
1563 1563
 	// This is a workaround for the kernel not discarding block so
1564 1564
 	// on the thin pool when we remove a thinp device, so we do it
1565
-	// manually
1566
-	if err := devices.activateDeviceIfNeeded(info); err != nil {
1565
+	// manually.
1566
+	// Even if device is deferred deleted, activate it and isue
1567
+	// discards.
1568
+	if err := devices.activateDeviceIfNeeded(info, true); err != nil {
1567 1569
 		return err
1568 1570
 	}
1569 1571
 
... ...
@@ -1584,7 +1667,7 @@ func (devices *DeviceSet) issueDiscard(info *devInfo) error {
1584 1584
 }
1585 1585
 
1586 1586
 // Should be called with devices.Lock() held.
1587
-func (devices *DeviceSet) deleteDevice(info *devInfo) error {
1587
+func (devices *DeviceSet) deleteDevice(info *devInfo, syncDelete bool) error {
1588 1588
 	if devices.doBlkDiscard {
1589 1589
 		devices.issueDiscard(info)
1590 1590
 	}
... ...
@@ -1595,7 +1678,7 @@ func (devices *DeviceSet) deleteDevice(info *devInfo) error {
1595 1595
 		return err
1596 1596
 	}
1597 1597
 
1598
-	if err := devices.deleteTransaction(info); err != nil {
1598
+	if err := devices.deleteTransaction(info, syncDelete); err != nil {
1599 1599
 		return err
1600 1600
 	}
1601 1601
 
... ...
@@ -1604,8 +1687,12 @@ func (devices *DeviceSet) deleteDevice(info *devInfo) error {
1604 1604
 	return nil
1605 1605
 }
1606 1606
 
1607
-// DeleteDevice deletes a device from the hash.
1608
-func (devices *DeviceSet) DeleteDevice(hash string) error {
1607
+// DeleteDevice will return success if device has been marked for deferred
1608
+// removal. If one wants to override that and want DeleteDevice() to fail if
1609
+// device was busy and could not be deleted, set syncDelete=true.
1610
+func (devices *DeviceSet) DeleteDevice(hash string, syncDelete bool) error {
1611
+	logrus.Debugf("devmapper: DeleteDevice(hash=%v syncDelete=%v) START", hash, syncDelete)
1612
+	defer logrus.Debugf("devmapper: DeleteDevice(hash=%v syncDelete=%v) END", hash, syncDelete)
1609 1613
 	info, err := devices.lookupDeviceWithLock(hash)
1610 1614
 	if err != nil {
1611 1615
 		return err
... ...
@@ -1624,7 +1711,7 @@ func (devices *DeviceSet) DeleteDevice(hash string) error {
1624 1624
 		return fmt.Errorf("devmapper: Can't delete device %v as it is still mounted. mntCount=%v", info.Hash, info.mountCount)
1625 1625
 	}
1626 1626
 
1627
-	return devices.deleteDevice(info)
1627
+	return devices.deleteDevice(info, syncDelete)
1628 1628
 }
1629 1629
 
1630 1630
 func (devices *DeviceSet) deactivatePool() error {
... ...
@@ -1811,6 +1898,10 @@ func (devices *DeviceSet) MountDevice(hash, path, mountLabel string) error {
1811 1811
 		return err
1812 1812
 	}
1813 1813
 
1814
+	if info.Deleted {
1815
+		return fmt.Errorf("devmapper: Can't mount device %v as it has been marked for deferred deletion", info.Hash)
1816
+	}
1817
+
1814 1818
 	info.lock.Lock()
1815 1819
 	defer info.lock.Unlock()
1816 1820
 
... ...
@@ -1826,7 +1917,7 @@ func (devices *DeviceSet) MountDevice(hash, path, mountLabel string) error {
1826 1826
 		return nil
1827 1827
 	}
1828 1828
 
1829
-	if err := devices.activateDeviceIfNeeded(info); err != nil {
1829
+	if err := devices.activateDeviceIfNeeded(info, false); err != nil {
1830 1830
 		return fmt.Errorf("Error activating devmapper device for '%s': %s", hash, err)
1831 1831
 	}
1832 1832
 
... ...
@@ -1946,7 +2037,7 @@ func (devices *DeviceSet) GetDeviceStatus(hash string) (*DevStatus, error) {
1946 1946
 		TransactionID: info.TransactionID,
1947 1947
 	}
1948 1948
 
1949
-	if err := devices.activateDeviceIfNeeded(info); err != nil {
1949
+	if err := devices.activateDeviceIfNeeded(info, false); err != nil {
1950 1950
 		return nil, fmt.Errorf("Error activating devmapper device for '%s': %s", hash, err)
1951 1951
 	}
1952 1952
 
... ...
@@ -143,7 +143,7 @@ func (d *Driver) Remove(id string) error {
143 143
 	}
144 144
 
145 145
 	// This assumes the device has been properly Get/Put:ed and thus is unmounted
146
-	if err := d.DeviceSet.DeleteDevice(id); err != nil {
146
+	if err := d.DeviceSet.DeleteDevice(id, false); err != nil {
147 147
 		return err
148 148
 	}
149 149
 
... ...
@@ -750,7 +750,11 @@ func DeleteDevice(poolName string, deviceID int) error {
750 750
 		return fmt.Errorf("Can't set message %s", err)
751 751
 	}
752 752
 
753
+	dmSawBusy = false
753 754
 	if err := task.run(); err != nil {
755
+		if dmSawBusy {
756
+			return ErrBusy
757
+		}
754 758
 		return fmt.Errorf("Error running DeleteDevice %s", err)
755 759
 	}
756 760
 	return nil