Browse code

Added support for Network diagnostics

Ravi Sankar Penta authored on 2016/10/04 05:55:08
Showing 1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,167 @@
0
+package network
1
+
2
+import (
3
+	"errors"
4
+	"fmt"
5
+	"strconv"
6
+	"strings"
7
+
8
+	flag "github.com/spf13/pflag"
9
+
10
+	kapi "k8s.io/kubernetes/pkg/api"
11
+	kclient "k8s.io/kubernetes/pkg/client/unversioned"
12
+
13
+	osclient "github.com/openshift/origin/pkg/client"
14
+	osclientcmd "github.com/openshift/origin/pkg/cmd/util/clientcmd"
15
+	"github.com/openshift/origin/pkg/diagnostics/networkpod/util"
16
+	"github.com/openshift/origin/pkg/diagnostics/types"
17
+)
18
+
19
+const (
20
+	NetworkDiagnosticName = "NetworkCheck"
21
+)
22
+
23
+// NetworkDiagnostic is a diagnostic that runs a network diagnostic pod and relays the results.
24
+type NetworkDiagnostic struct {
25
+	KubeClient          *kclient.Client
26
+	OSClient            *osclient.Client
27
+	ClientFlags         *flag.FlagSet
28
+	Level               int
29
+	Factory             *osclientcmd.Factory
30
+	PreventModification bool
31
+	LogDir              string
32
+
33
+	pluginName   string
34
+	nodes        []kapi.Node
35
+	nsName       string
36
+	globalnsName string
37
+	res          types.DiagnosticResult
38
+}
39
+
40
+// Name is part of the Diagnostic interface and just returns name.
41
+func (d *NetworkDiagnostic) Name() string {
42
+	return NetworkDiagnosticName
43
+}
44
+
45
+// Description is part of the Diagnostic interface and provides a user-focused description of what the diagnostic does.
46
+func (d *NetworkDiagnostic) Description() string {
47
+	return "Create a pod on all schedulable nodes and run network diagnostics from the application standpoint"
48
+}
49
+
50
+// CanRun is part of the Diagnostic interface; it determines if the conditions are right to run this diagnostic.
51
+func (d *NetworkDiagnostic) CanRun() (bool, error) {
52
+	if d.PreventModification {
53
+		return false, errors.New("running the network diagnostic pod is an API change, which is prevented as you indicated")
54
+	} else if d.KubeClient == nil {
55
+		return false, errors.New("must have kube client")
56
+	} else if d.OSClient == nil {
57
+		return false, errors.New("must have openshift client")
58
+	} else if _, err := d.getKubeConfig(); err != nil {
59
+		return false, err
60
+	}
61
+	return true, nil
62
+}
63
+
64
+// Check is part of the Diagnostic interface; it runs the actual diagnostic logic
65
+func (d *NetworkDiagnostic) Check() types.DiagnosticResult {
66
+	d.res = types.NewDiagnosticResult(NetworkDiagnosticName)
67
+
68
+	var err error
69
+	var ok bool
70
+	d.pluginName, ok, err = util.GetOpenShiftNetworkPlugin(d.OSClient)
71
+	if err != nil {
72
+		d.res.Error("DNet2001", err, fmt.Sprintf("Checking network plugin failed. Error: %s", err))
73
+		return d.res
74
+	}
75
+	if !ok {
76
+		d.res.Warn("DNet2002", nil, fmt.Sprintf("Skipping network diagnostics check. Reason: Not using openshift network plugin."))
77
+		return d.res
78
+	}
79
+
80
+	d.nodes, err = util.GetSchedulableNodes(d.KubeClient)
81
+	if err != nil {
82
+		d.res.Error("DNet2003", err, fmt.Sprintf("Fetching schedulable nodes failed. Error: %s", err))
83
+		return d.res
84
+	}
85
+	if len(d.nodes) == 0 {
86
+		d.res.Warn("DNet2004", nil, fmt.Sprint("Skipping network checks. Reason: No schedulable/ready nodes found."))
87
+		return d.res
88
+	}
89
+
90
+	if len(d.LogDir) == 0 {
91
+		d.LogDir = util.NetworkDiagDefaultLogDir
92
+	}
93
+	d.runNetworkDiagnostic()
94
+	return d.res
95
+}
96
+
97
+func (d *NetworkDiagnostic) runNetworkDiagnostic() {
98
+	// Setup test environment
99
+	if err := d.TestSetup(); err != nil {
100
+		d.res.Error("DNet2005", err, fmt.Sprintf("Setting up test environment for network diagnostics failed: %v", err))
101
+		return
102
+	}
103
+	defer func() {
104
+		d.Cleanup()
105
+	}()
106
+
107
+	// Need to show summary at least
108
+	loglevel := d.Level
109
+	if loglevel > 2 {
110
+		loglevel = 2
111
+	}
112
+
113
+	// TEST Phase: Run network diagnostic pod on all valid nodes in parallel
114
+	command := []string{"chroot", util.NetworkDiagContainerMountPath, "openshift", "infra", "network-diagnostic-pod", "-l", strconv.Itoa(loglevel)}
115
+	if err := d.runNetworkPod(command); err != nil {
116
+		d.res.Error("DNet2006", err, err.Error())
117
+		return
118
+	}
119
+	// Wait for network diagnostic pod completion
120
+	if err := d.waitForNetworkPod(d.nsName, util.NetworkDiagPodNamePrefix, []kapi.PodPhase{kapi.PodSucceeded, kapi.PodFailed}); err != nil {
121
+		d.res.Error("DNet2007", err, err.Error())
122
+		return
123
+	}
124
+	// Gather logs from network diagnostic pod on all valid nodes
125
+	diagsFailed := false
126
+	if err := d.CollectNetworkPodLogs(); err != nil {
127
+		d.res.Error("DNet2008", err, err.Error())
128
+		diagsFailed = true
129
+	}
130
+
131
+	// Collection Phase: Run network diagnostic pod on all valid nodes
132
+	command = []string{"chroot", util.NetworkDiagContainerMountPath, "sleep", "1000"}
133
+	if err := d.runNetworkPod(command); err != nil {
134
+		d.res.Error("DNet2009", err, err.Error())
135
+		return
136
+	}
137
+
138
+	// Wait for network diagnostic pod to start
139
+	if err := d.waitForNetworkPod(d.nsName, util.NetworkDiagPodNamePrefix, []kapi.PodPhase{kapi.PodRunning, kapi.PodFailed, kapi.PodSucceeded}); err != nil {
140
+		d.res.Error("DNet2010", err, err.Error())
141
+		// Do not bail out here, collect what ever info is available from all valid nodes
142
+	}
143
+
144
+	if err := d.CollectNetworkInfo(diagsFailed); err != nil {
145
+		d.res.Error("DNet2011", err, err.Error())
146
+	}
147
+
148
+	if diagsFailed {
149
+		d.res.Info("DNet2012", fmt.Sprintf("Additional info collected under %q for further analysis", d.LogDir))
150
+	}
151
+	return
152
+}
153
+
154
+func (d *NetworkDiagnostic) runNetworkPod(command []string) error {
155
+	for _, node := range d.nodes {
156
+		podName := kapi.SimpleNameGenerator.GenerateName(fmt.Sprintf("%s-", util.NetworkDiagPodNamePrefix))
157
+
158
+		pod := GetNetworkDiagnosticsPod(command, podName, node.Name)
159
+		_, err := d.KubeClient.Pods(d.nsName).Create(pod)
160
+		if err != nil {
161
+			return fmt.Errorf("Creating network diagnostic pod %q on node %q with command %q failed: %v", podName, node.Name, strings.Join(command, " "), err)
162
+		}
163
+		d.res.Debug("DNet2013", fmt.Sprintf("Created network diagnostic pod %q on node %q with command: %q", podName, node.Name, strings.Join(command, " ")))
164
+	}
165
+	return nil
166
+}