full diff: https://github.com/docker/swarmkit/compare/062b694b46c0744d601eebef79f3f7433d808a04...0b8364e7d08aa0e972241eb59ae981a67a587a0e
- Fix leaking tasks.db
Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
| ... | ... |
@@ -128,7 +128,7 @@ github.com/containerd/ttrpc 92c8520ef9f86600c650dd540266 |
| 128 | 128 |
github.com/gogo/googleapis d31c731455cb061f42baff3bda55bad0118b126b # v1.2.0 |
| 129 | 129 |
|
| 130 | 130 |
# cluster |
| 131 |
-github.com/docker/swarmkit 062b694b46c0744d601eebef79f3f7433d808a04 # bump_v19.03 branch |
|
| 131 |
+github.com/docker/swarmkit 0b8364e7d08aa0e972241eb59ae981a67a587a0e # bump_v19.03 branch |
|
| 132 | 132 |
github.com/gogo/protobuf ba06b47c162d49f2af050fb4c75bcbc86a159d5c # v1.2.1 |
| 133 | 133 |
github.com/golang/protobuf aa810b61a9c79d51363740d207bb46cf8e620ed5 # v1.2.0 |
| 134 | 134 |
github.com/cloudflare/cfssl 5d63dbd981b5c408effbb58c442d54761ff94fbd # 1.3.2 |
| ... | ... |
@@ -131,7 +131,9 @@ func PutTask(tx *bolt.Tx, task *api.Task) error {
|
| 131 | 131 |
|
| 132 | 132 |
// PutTaskStatus updates the status for the task with id. |
| 133 | 133 |
func PutTaskStatus(tx *bolt.Tx, id string, status *api.TaskStatus) error {
|
| 134 |
- return withCreateTaskBucketIfNotExists(tx, id, func(bkt *bolt.Bucket) error {
|
|
| 134 |
+ // this used to be withCreateTaskBucketIfNotExists, but that could lead |
|
| 135 |
+ // to weird race conditions, and was not necessary. |
|
| 136 |
+ return withTaskBucket(tx, id, func(bkt *bolt.Bucket) error {
|
|
| 135 | 137 |
p, err := proto.Marshal(status) |
| 136 | 138 |
if err != nil {
|
| 137 | 139 |
return err |
| ... | ... |
@@ -278,10 +278,15 @@ func reconcileTaskState(ctx context.Context, w *worker, assignments []*api.Assig |
| 278 | 278 |
|
| 279 | 279 |
removeTaskAssignment := func(taskID string) error {
|
| 280 | 280 |
ctx := log.WithLogger(ctx, log.G(ctx).WithField("task.id", taskID))
|
| 281 |
- if err := SetTaskAssignment(tx, taskID, false); err != nil {
|
|
| 282 |
- log.G(ctx).WithError(err).Error("error setting task assignment in database")
|
|
| 281 |
+ // if a task is no longer assigned, then we do not have to keep track |
|
| 282 |
+ // of it. a task will only be unassigned when it is deleted on the |
|
| 283 |
+ // manager. instead of SetTaskAssginment to true, we'll just remove the |
|
| 284 |
+ // task now. |
|
| 285 |
+ if err := DeleteTask(tx, taskID); err != nil {
|
|
| 286 |
+ log.G(ctx).WithError(err).Error("error removing de-assigned task")
|
|
| 287 |
+ return err |
|
| 283 | 288 |
} |
| 284 |
- return err |
|
| 289 |
+ return nil |
|
| 285 | 290 |
} |
| 286 | 291 |
|
| 287 | 292 |
// If this was a complete set of assignments, we're going to remove all the remaining |
| ... | ... |
@@ -500,6 +505,21 @@ func (w *worker) newTaskManager(ctx context.Context, tx *bolt.Tx, task *api.Task |
| 500 | 500 |
// updateTaskStatus reports statuses to listeners, read lock must be held. |
| 501 | 501 |
func (w *worker) updateTaskStatus(ctx context.Context, tx *bolt.Tx, taskID string, status *api.TaskStatus) error {
|
| 502 | 502 |
if err := PutTaskStatus(tx, taskID, status); err != nil {
|
| 503 |
+ // we shouldn't fail to put a task status. however, there exists the |
|
| 504 |
+ // possibility of a race in which we try to put a task status after the |
|
| 505 |
+ // task has been deleted. because this whole contraption is a careful |
|
| 506 |
+ // dance of too-tightly-coupled concurrent parts, fixing tht race is |
|
| 507 |
+ // fraught with hazards. instead, we'll recognize that it can occur, |
|
| 508 |
+ // log the error, and then ignore it. |
|
| 509 |
+ if err == errTaskUnknown {
|
|
| 510 |
+ // log at info level. debug logging in docker is already really |
|
| 511 |
+ // verbose, so many people disable it. the race that causes this |
|
| 512 |
+ // behavior should be very rare, but if it occurs, we should know |
|
| 513 |
+ // about it, because if there is some case where it is _not_ rare, |
|
| 514 |
+ // then knowing about it will go a long way toward debugging. |
|
| 515 |
+ log.G(ctx).Info("attempted to update status for a task that has been removed")
|
|
| 516 |
+ return nil |
|
| 517 |
+ } |
|
| 503 | 518 |
log.G(ctx).WithError(err).Error("failed writing status to disk")
|
| 504 | 519 |
return err |
| 505 | 520 |
} |