Browse code

Don’t try to restore swarm from incomplete state

Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com>

Tonis Tiigi authored on 2016/06/19 11:43:47
Showing 2 changed files
... ...
@@ -93,7 +93,7 @@ func New(config Config) (*Cluster, error) {
93 93
 		reconnectDelay: initialReconnectDelay,
94 94
 	}
95 95
 
96
-	dt, err := ioutil.ReadFile(filepath.Join(root, stateFile))
96
+	st, err := c.loadState()
97 97
 	if err != nil {
98 98
 		if os.IsNotExist(err) {
99 99
 			return c, nil
... ...
@@ -101,11 +101,6 @@ func New(config Config) (*Cluster, error) {
101 101
 		return nil, err
102 102
 	}
103 103
 
104
-	var st state
105
-	if err := json.Unmarshal(dt, &st); err != nil {
106
-		return nil, err
107
-	}
108
-
109 104
 	n, ctx, err := c.startNewNode(false, st.ListenAddr, "", "", "", false)
110 105
 	if err != nil {
111 106
 		return nil, err
... ...
@@ -124,6 +119,25 @@ func New(config Config) (*Cluster, error) {
124 124
 	return c, nil
125 125
 }
126 126
 
127
+func (c *Cluster) loadState() (*state, error) {
128
+	dt, err := ioutil.ReadFile(filepath.Join(c.root, stateFile))
129
+	if err != nil {
130
+		return nil, err
131
+	}
132
+	// missing certificate means no actual state to restore from
133
+	if _, err := os.Stat(filepath.Join(c.root, "certificates/swarm-node.crt")); err != nil {
134
+		if os.IsNotExist(err) {
135
+			c.clearState()
136
+		}
137
+		return nil, err
138
+	}
139
+	var st state
140
+	if err := json.Unmarshal(dt, &st); err != nil {
141
+		return nil, err
142
+	}
143
+	return &st, nil
144
+}
145
+
127 146
 func (c *Cluster) saveState() error {
128 147
 	dt, err := json.Marshal(state{ListenAddr: c.listenAddr})
129 148
 	if err != nil {
... ...
@@ -410,6 +424,7 @@ func (c *Cluster) Leave(force bool) error {
410 410
 }
411 411
 
412 412
 func (c *Cluster) clearState() error {
413
+	// todo: backup this data instead of removing?
413 414
 	if err := os.RemoveAll(c.root); err != nil {
414 415
 		return err
415 416
 	}
... ...
@@ -578,6 +578,31 @@ func (s *DockerSwarmSuite) TestApiSwarmLeaveOnPendingJoin(c *check.C) {
578 578
 	c.Assert(id, checker.HasPrefix, strings.TrimSpace(id2))
579 579
 }
580 580
 
581
+// #23705
582
+func (s *DockerSwarmSuite) TestApiSwarmRestoreOnPendingJoin(c *check.C) {
583
+	d := s.AddDaemon(c, false, false)
584
+	go d.Join("nosuchhost:1234", "", "", false) // will block on pending state
585
+
586
+	for i := 0; ; i++ {
587
+		info, err := d.info()
588
+		c.Assert(err, checker.IsNil)
589
+		if info.LocalNodeState == swarm.LocalNodeStatePending {
590
+			break
591
+		}
592
+		if i > 10 {
593
+			c.Fatalf("node did not go to pending state: %v", info.LocalNodeState)
594
+		}
595
+		time.Sleep(100 * time.Millisecond)
596
+	}
597
+
598
+	c.Assert(d.Stop(), checker.IsNil)
599
+	c.Assert(d.Start(), checker.IsNil)
600
+
601
+	info, err := d.info()
602
+	c.Assert(err, checker.IsNil)
603
+	c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
604
+}
605
+
581 606
 func (s *DockerSwarmSuite) TestApiSwarmManagerRestore(c *check.C) {
582 607
 	d1 := s.AddDaemon(c, true, true)
583 608