Browse code

[origin-aggregated-logging 207] Add diagnostics for aggregated logging

Jeff Cantrill authored on 2016/08/23 06:13:02
Showing 27 changed files
... ...
@@ -4,6 +4,7 @@
4 4
 /.project
5 5
 /.vagrant
6 6
 /.vscode
7
+/.settings
7 8
 /cpu.pprof
8 9
 /assets/app/config.local.js
9 10
 /assets/nbproject
... ...
@@ -53,8 +53,19 @@ diagnostics to run which regular users cannot.
53 53
 are skipped.
54 54
 .PP
55 55
 Diagnostics may be individually run by passing diagnostic name as arguments.
56
+
57
+.PP
58
+.RS
59
+
60
+.nf
61
+oadm diagnostics <DiagnosticName>
62
+
63
+.fi
64
+.RE
65
+
66
+.PP
56 67
 The available diagnostic names are:
57
-AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus
68
+AggregatedLogging AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus
58 69
 
59 70
 
60 71
 .SH OPTIONS
... ...
@@ -53,8 +53,19 @@ diagnostics to run which regular users cannot.
53 53
 are skipped.
54 54
 .PP
55 55
 Diagnostics may be individually run by passing diagnostic name as arguments.
56
+
57
+.PP
58
+.RS
59
+
60
+.nf
61
+oc adm diagnostics <DiagnosticName>
62
+
63
+.fi
64
+.RE
65
+
66
+.PP
56 67
 The available diagnostic names are:
57
-AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus
68
+AggregatedLogging AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus
58 69
 
59 70
 
60 71
 .SH OPTIONS
... ...
@@ -53,8 +53,19 @@ diagnostics to run which regular users cannot.
53 53
 are skipped.
54 54
 .PP
55 55
 Diagnostics may be individually run by passing diagnostic name as arguments.
56
+
57
+.PP
58
+.RS
59
+
60
+.nf
61
+openshift admin diagnostics <DiagnosticName>
62
+
63
+.fi
64
+.RE
65
+
66
+.PP
56 67
 The available diagnostic names are:
57
-AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus
68
+AggregatedLogging AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus
58 69
 
59 70
 
60 71
 .SH OPTIONS
... ...
@@ -53,8 +53,19 @@ diagnostics to run which regular users cannot.
53 53
 are skipped.
54 54
 .PP
55 55
 Diagnostics may be individually run by passing diagnostic name as arguments.
56
+
57
+.PP
58
+.RS
59
+
60
+.nf
61
+openshift cli adm diagnostics <DiagnosticName>
62
+
63
+.fi
64
+.RE
65
+
66
+.PP
56 67
 The available diagnostic names are:
57
-AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus
68
+AggregatedLogging AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus
58 69
 
59 70
 
60 71
 .SH OPTIONS
... ...
@@ -53,8 +53,19 @@ diagnostics to run which regular users cannot.
53 53
 are skipped.
54 54
 .PP
55 55
 Diagnostics may be individually run by passing diagnostic name as arguments.
56
+
57
+.PP
58
+.RS
59
+
60
+.nf
61
+openshift ex diagnostics <DiagnosticName>
62
+
63
+.fi
64
+.RE
65
+
66
+.PP
56 67
 The available diagnostic names are:
57
-AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus
68
+AggregatedLogging AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus
58 69
 
59 70
 
60 71
 .SH OPTIONS
... ...
@@ -14,13 +14,24 @@ import (
14 14
 	"github.com/openshift/origin/pkg/client"
15 15
 	osclientcmd "github.com/openshift/origin/pkg/cmd/util/clientcmd"
16 16
 	clustdiags "github.com/openshift/origin/pkg/diagnostics/cluster"
17
+	agldiags "github.com/openshift/origin/pkg/diagnostics/cluster/aggregated_logging"
17 18
 	"github.com/openshift/origin/pkg/diagnostics/types"
18 19
 )
19 20
 
20 21
 var (
21 22
 	// availableClusterDiagnostics contains the names of cluster diagnostics that can be executed
22 23
 	// during a single run of diagnostics. Add more diagnostics to the list as they are defined.
23
-	availableClusterDiagnostics = sets.NewString(clustdiags.NodeDefinitionsName, clustdiags.ClusterRegistryName, clustdiags.ClusterRouterName, clustdiags.ClusterRolesName, clustdiags.ClusterRoleBindingsName, clustdiags.MasterNodeName, clustdiags.MetricsApiProxyName, clustdiags.ServiceExternalIPsName)
24
+	availableClusterDiagnostics = sets.NewString(
25
+		agldiags.AggregatedLoggingName,
26
+		clustdiags.ClusterRegistryName,
27
+		clustdiags.ClusterRouterName,
28
+		clustdiags.ClusterRolesName,
29
+		clustdiags.ClusterRoleBindingsName,
30
+		clustdiags.MasterNodeName,
31
+		clustdiags.MetricsApiProxyName,
32
+		clustdiags.NodeDefinitionsName,
33
+		clustdiags.ServiceExternalIPsName,
34
+	)
24 35
 )
25 36
 
26 37
 // buildClusterDiagnostics builds cluster Diagnostic objects if a cluster-admin client can be extracted from the rawConfig passed in.
... ...
@@ -46,6 +57,8 @@ func (o DiagnosticsOptions) buildClusterDiagnostics(rawConfig *clientcmdapi.Conf
46 46
 	for _, diagnosticName := range requestedDiagnostics {
47 47
 		var d types.Diagnostic
48 48
 		switch diagnosticName {
49
+		case agldiags.AggregatedLoggingName:
50
+			d = agldiags.NewAggregatedLogging(o.MasterConfigLocation, kclusterClient, clusterClient)
49 51
 		case clustdiags.NodeDefinitionsName:
50 52
 			d = &clustdiags.NodeDefinitions{KubeClient: kclusterClient, OsClient: clusterClient}
51 53
 		case clustdiags.MasterNodeName:
... ...
@@ -80,6 +80,9 @@ you will receive an error if they are not found. For example:
80 80
   are skipped.
81 81
 
82 82
 Diagnostics may be individually run by passing diagnostic name as arguments.
83
+
84
+    %[1]s <DiagnosticName>
85
+    
83 86
 The available diagnostic names are:
84 87
 %[2]s
85 88
 `
... ...
@@ -95,7 +95,7 @@ save may be your own.
95 95
 
96 96
 A diagnostic is an object that conforms to the Diagnostic interface
97 97
 (see pkg/diagnostics/types/diagnostic.go). The diagnostic object should
98
-be built in one of the builders in the pkg/cmd/experimental/diagnostics
98
+be built in one of the builders in the pkg/cmd/admin/diagnostics
99 99
 package (based on whether it depends on client, cluster-admin, or host
100 100
 configuration). When executed, the diagnostic logs its findings into
101 101
 a result object. It should be assumed that they may run in parallel.
102 102
new file mode 100644
... ...
@@ -0,0 +1,41 @@
0
+package aggregated_logging
1
+
2
+import (
3
+	"fmt"
4
+
5
+	"k8s.io/kubernetes/pkg/apis/rbac"
6
+	"k8s.io/kubernetes/pkg/util/sets"
7
+)
8
+
9
+const clusterReaderRoleBindingName = "cluster-readers"
10
+
11
+var clusterReaderRoleBindingNames = sets.NewString(fluentdServiceAccountName)
12
+
13
+const clusterReaderUnboundServiceAccount = `
14
+The ServiceAccount '%[1]s' is not a cluster-reader in the '%[2]s' project.  This
15
+is required to enable Fluentd to look up pod metadata for the logs it gathers.
16
+As a user with a cluster-admin role, you can grant the permissions by running
17
+the following:
18
+
19
+  oadm policy add-cluster-role-to-user cluster-reader system:serviceaccount:%[2]s:%[1]s
20
+`
21
+
22
+func checkClusterRoleBindings(r diagnosticReporter, adapter clusterRoleBindingsAdapter, project string) {
23
+	r.Debug("AGL0600", "Checking ClusterRoleBindings...")
24
+	crb, err := adapter.getClusterRoleBinding(clusterReaderRoleBindingName)
25
+	if err != nil {
26
+		r.Error("AGL0605", err, fmt.Sprintf("There was an error while trying to retrieve the ClusterRoleBindings for the logging stack: %s", err))
27
+		return
28
+	}
29
+	boundServiceAccounts := sets.NewString()
30
+	for _, subject := range crb.Subjects {
31
+		if subject.Kind == rbac.ServiceAccountKind && subject.Namespace == project {
32
+			boundServiceAccounts.Insert(subject.Name)
33
+		}
34
+	}
35
+	for _, name := range clusterReaderRoleBindingNames.List() {
36
+		if !boundServiceAccounts.Has(name) {
37
+			r.Error("AGL0610", nil, fmt.Sprintf(clusterReaderUnboundServiceAccount, name, project))
38
+		}
39
+	}
40
+}
0 41
new file mode 100644
... ...
@@ -0,0 +1,70 @@
0
+package aggregated_logging
1
+
2
+import (
3
+	"errors"
4
+	"testing"
5
+
6
+	kapi "k8s.io/kubernetes/pkg/api"
7
+	"k8s.io/kubernetes/pkg/apis/rbac"
8
+
9
+	authapi "github.com/openshift/origin/pkg/authorization/api"
10
+	"github.com/openshift/origin/pkg/diagnostics/log"
11
+)
12
+
13
+type fakeRoleBindingDiagnostic struct {
14
+	fakeDiagnostic
15
+	fakeClusterRoleBinding authapi.ClusterRoleBinding
16
+}
17
+
18
+func newFakeRoleBindingDiagnostic(t *testing.T) *fakeRoleBindingDiagnostic {
19
+	return &fakeRoleBindingDiagnostic{
20
+		fakeDiagnostic: *newFakeDiagnostic(t),
21
+	}
22
+}
23
+
24
+func (f *fakeRoleBindingDiagnostic) getClusterRoleBinding(name string) (*authapi.ClusterRoleBinding, error) {
25
+	if f.err != nil {
26
+		return nil, f.err
27
+	}
28
+	return &f.fakeClusterRoleBinding, nil
29
+}
30
+func (f *fakeRoleBindingDiagnostic) addBinding(name string, namespace string) {
31
+	ref := kapi.ObjectReference{
32
+		Name:      name,
33
+		Kind:      rbac.ServiceAccountKind,
34
+		Namespace: namespace,
35
+	}
36
+	f.fakeClusterRoleBinding.Subjects = append(f.fakeClusterRoleBinding.Subjects, ref)
37
+}
38
+
39
+//test error when client error
40
+func TestCheckClusterRoleBindingsWhenErrorFromClientRetrievingRoles(t *testing.T) {
41
+	d := newFakeRoleBindingDiagnostic(t)
42
+	d.err = errors.New("client error")
43
+
44
+	checkClusterRoleBindings(d, d, fakeProject)
45
+
46
+	d.assertMessage("AGL0605", "Exp. an error message if client error retrieving ClusterRoleBindings", log.ErrorLevel)
47
+	d.dumpMessages()
48
+}
49
+
50
+func TestCheckClusterRoleBindingsWhenClusterReaderIsNotInProject(t *testing.T) {
51
+	d := newFakeRoleBindingDiagnostic(t)
52
+	d.addBinding("someName", "someRandomProject")
53
+	d.addBinding(fluentdServiceAccountName, fakeProject)
54
+
55
+	checkClusterRoleBindings(d, d, fakeProject)
56
+
57
+	d.assertNoErrors()
58
+	d.dumpMessages()
59
+}
60
+
61
+func TestCheckClusterRoleBindingsWhenUnboundServiceAccounts(t *testing.T) {
62
+	d := newFakeRoleBindingDiagnostic(t)
63
+	d.addBinding(fluentdServiceAccountName, "someRandomProject")
64
+
65
+	checkClusterRoleBindings(d, d, fakeProject)
66
+
67
+	d.assertMessage("AGL0610", "Exp. an error when the exp service-accounts dont have cluster-reader access", log.ErrorLevel)
68
+	d.dumpMessages()
69
+}
0 70
new file mode 100644
... ...
@@ -0,0 +1,118 @@
0
+package aggregated_logging
1
+
2
+import (
3
+	"fmt"
4
+
5
+	kapi "k8s.io/kubernetes/pkg/api"
6
+	kapisext "k8s.io/kubernetes/pkg/apis/extensions"
7
+	"k8s.io/kubernetes/pkg/labels"
8
+)
9
+
10
+const daemonSetNoLabeledNodes = `
11
+There are no nodes that match the selector for DaemonSet '%[1]s'. This
12
+means Fluentd is not running and is not gathering logs from any nodes.
13
+An example of a command to target a specific node for this DaemonSet:
14
+
15
+  oc label node/node1.example.com %[2]s
16
+
17
+or to label them all:
18
+
19
+  oc label node --all %[2]s
20
+`
21
+
22
+const daemonSetPartialNodesLabeled = `
23
+There are some nodes that match the selector for DaemonSet '%s'.  
24
+A list of matching nodes can be discovered by running:
25
+
26
+  oc get nodes -l %s
27
+`
28
+const daemonSetNoPodsFound = `
29
+There were no pods found that match DaemonSet '%s' with matchLabels '%s'
30
+`
31
+const daemonSetPodsNotRunning = `
32
+The Pod '%[1]s' matched by DaemonSet '%[2]s' is not in '%[3]s' status: %[4]s. 
33
+
34
+Depending upon the state, this could mean there is an error running the image 
35
+for one or more pod containers, the node could be pulling images, etc.  Try running
36
+the following commands to get additional information:
37
+
38
+  oc describe pod %[1]s -n %[5]s
39
+  oc logs %[1]s -n %[5]s
40
+  oc get events -n %[5]s
41
+`
42
+const daemonSetNotFound = `
43
+There were no DaemonSets in project '%s' that included label '%s'.  This implies
44
+the Fluentd pods are not deployed or the logging stack needs to be upgraded.  Try
45
+running the installer to upgrade the logging stack.
46
+`
47
+
48
+var loggingInfraFluentdSelector = labels.Set{loggingInfraKey: "fluentd"}
49
+
50
+func checkDaemonSets(r diagnosticReporter, adapter daemonsetAdapter, project string) {
51
+	r.Debug("AGL0400", fmt.Sprintf("Checking DaemonSets in project '%s'...", project))
52
+	dsList, err := adapter.daemonsets(project, kapi.ListOptions{LabelSelector: loggingInfraFluentdSelector.AsSelector()})
53
+	if err != nil {
54
+		r.Error("AGL0405", err, fmt.Sprintf("There was an error while trying to retrieve the logging DaemonSets in project '%s' which is most likely transient: %s", project, err))
55
+		return
56
+	}
57
+	if len(dsList.Items) == 0 {
58
+		r.Error("AGL0407", err, fmt.Sprintf(daemonSetNotFound, project, loggingInfraFluentdSelector.AsSelector()))
59
+		return
60
+	}
61
+	nodeList, err := adapter.nodes(kapi.ListOptions{})
62
+	if err != nil {
63
+		r.Error("AGL0410", err, fmt.Sprintf("There was an error while trying to retrieve the list of Nodes which is most likely transient: %s", err))
64
+		return
65
+	}
66
+	for _, ds := range dsList.Items {
67
+		labeled := 0
68
+		nodeSelector := labels.Set(ds.Spec.Template.Spec.NodeSelector).AsSelector()
69
+		r.Debug("AGL0415", fmt.Sprintf("Checking DaemonSet '%s' nodeSelector '%s'", ds.ObjectMeta.Name, nodeSelector))
70
+		for _, node := range nodeList.Items {
71
+			if nodeSelector.Matches(labels.Set(node.Labels)) {
72
+				labeled = labeled + 1
73
+			}
74
+		}
75
+		switch {
76
+		case labeled == 0:
77
+			r.Error("AGL0420", nil, fmt.Sprintf(daemonSetNoLabeledNodes, ds.ObjectMeta.Name, nodeSelector))
78
+			break
79
+		case labeled < len(nodeList.Items):
80
+			r.Warn("AGL0425", nil, fmt.Sprintf(daemonSetPartialNodesLabeled, ds.ObjectMeta.Name, nodeSelector))
81
+			break
82
+		default:
83
+			r.Debug("AGL0430", fmt.Sprintf("DaemonSet '%s' matches all nodes", ds.ObjectMeta.Name))
84
+		}
85
+		if labeled > 0 {
86
+			checkDaemonSetPods(r, adapter, ds, project, labeled)
87
+		}
88
+	}
89
+}
90
+
91
+func checkDaemonSetPods(r diagnosticReporter, adapter daemonsetAdapter, ds kapisext.DaemonSet, project string, numLabeledNodes int) {
92
+	if ds.Spec.Selector == nil {
93
+		r.Debug("AGL0455", "DaemonSet selector is nil. Unable to verify a pod is running")
94
+		return
95
+	}
96
+	podSelector := labels.Set(ds.Spec.Selector.MatchLabels).AsSelector()
97
+	r.Debug("AGL0435", fmt.Sprintf("Checking for running pods for DaemonSet '%s' with matchLabels '%s'", ds.ObjectMeta.Name, podSelector))
98
+	podList, err := adapter.pods(project, kapi.ListOptions{LabelSelector: podSelector})
99
+	if err != nil {
100
+		r.Error("AGL0438", err, fmt.Sprintf("There was an error retrieving pods matched to DaemonSet '%s' that is most likely transient: %s", ds.ObjectMeta.Name, err))
101
+		return
102
+	}
103
+	if len(podList.Items) == 0 {
104
+		r.Error("AGL0440", nil, fmt.Sprintf(daemonSetNoPodsFound, ds.ObjectMeta.Name, podSelector))
105
+		return
106
+	}
107
+	if len(podList.Items) != numLabeledNodes {
108
+		r.Error("AGL0443", nil, fmt.Sprintf("The number of deployed pods %s does not match the number of labeled nodes %d", len(podList.Items), numLabeledNodes))
109
+	}
110
+	for _, pod := range podList.Items {
111
+		if pod.Status.Phase != kapi.PodRunning {
112
+			podName := pod.ObjectMeta.Name
113
+			r.Error("AGL0445", nil, fmt.Sprintf(daemonSetPodsNotRunning, podName, ds.ObjectMeta.Name, kapi.PodRunning, pod.Status.Phase, project))
114
+		}
115
+
116
+	}
117
+}
0 118
new file mode 100644
... ...
@@ -0,0 +1,188 @@
0
+package aggregated_logging
1
+
2
+import (
3
+	"errors"
4
+	"testing"
5
+
6
+	kapi "k8s.io/kubernetes/pkg/api"
7
+	"k8s.io/kubernetes/pkg/api/unversioned"
8
+	kapisext "k8s.io/kubernetes/pkg/apis/extensions"
9
+
10
+	"github.com/openshift/origin/pkg/diagnostics/log"
11
+)
12
+
13
+const (
14
+	testPodsKey  = "pods"
15
+	testNodesKey = "nodes"
16
+	testDsKey    = "daemonsets"
17
+)
18
+
19
+type fakeDaemonSetDiagnostic struct {
20
+	fakeDiagnostic
21
+	fakePods       kapi.PodList
22
+	fakeNodes      kapi.NodeList
23
+	fakeDaemonsets kapisext.DaemonSetList
24
+	clienterrors   map[string]error
25
+}
26
+
27
+func newFakeDaemonSetDiagnostic(t *testing.T) *fakeDaemonSetDiagnostic {
28
+	return &fakeDaemonSetDiagnostic{
29
+		fakeDiagnostic: *newFakeDiagnostic(t),
30
+		clienterrors:   map[string]error{},
31
+	}
32
+}
33
+
34
+func (f *fakeDaemonSetDiagnostic) addDsPodWithPhase(state kapi.PodPhase) {
35
+	pod := kapi.Pod{
36
+		Spec: kapi.PodSpec{},
37
+		Status: kapi.PodStatus{
38
+			Phase: state,
39
+		},
40
+	}
41
+	f.fakePods.Items = append(f.fakePods.Items, pod)
42
+}
43
+
44
+func (f *fakeDaemonSetDiagnostic) addDaemonSetWithSelector(key string, value string) {
45
+	selector := map[string]string{key: value}
46
+	ds := kapisext.DaemonSet{
47
+		Spec: kapisext.DaemonSetSpec{
48
+			Template: kapi.PodTemplateSpec{
49
+				Spec: kapi.PodSpec{
50
+					NodeSelector: selector,
51
+				},
52
+			},
53
+			Selector: &unversioned.LabelSelector{MatchLabels: selector},
54
+		},
55
+	}
56
+	f.fakeDaemonsets.Items = append(f.fakeDaemonsets.Items, ds)
57
+}
58
+
59
+func (f *fakeDaemonSetDiagnostic) addNodeWithLabel(key string, value string) {
60
+	labels := map[string]string{key: value}
61
+	node := kapi.Node{
62
+		ObjectMeta: kapi.ObjectMeta{
63
+			Labels: labels,
64
+		},
65
+	}
66
+	f.fakeNodes.Items = append(f.fakeNodes.Items, node)
67
+}
68
+
69
+func (f *fakeDaemonSetDiagnostic) daemonsets(project string, options kapi.ListOptions) (*kapisext.DaemonSetList, error) {
70
+	value, ok := f.clienterrors[testDsKey]
71
+	if ok {
72
+		return nil, value
73
+	}
74
+	return &f.fakeDaemonsets, nil
75
+}
76
+
77
+func (f *fakeDaemonSetDiagnostic) nodes(options kapi.ListOptions) (*kapi.NodeList, error) {
78
+	value, ok := f.clienterrors[testNodesKey]
79
+	if ok {
80
+		return nil, value
81
+	}
82
+	return &f.fakeNodes, nil
83
+}
84
+
85
+func (f *fakeDaemonSetDiagnostic) pods(project string, options kapi.ListOptions) (*kapi.PodList, error) {
86
+	value, ok := f.clienterrors[testPodsKey]
87
+	if ok {
88
+		return nil, value
89
+	}
90
+	return &f.fakePods, nil
91
+}
92
+
93
+func TestCheckDaemonsetsWhenErrorResponseFromClientRetrievingDaemonsets(t *testing.T) {
94
+	d := newFakeDaemonSetDiagnostic(t)
95
+	d.clienterrors[testDsKey] = errors.New("someerror")
96
+
97
+	checkDaemonSets(d, d, fakeProject)
98
+
99
+	d.assertMessage("AGL0405", "Exp. error when client errors on retrieving DaemonSets", log.ErrorLevel)
100
+}
101
+
102
+func TestCheckDaemonsetsWhenNoDaemonsetsFound(t *testing.T) {
103
+	d := newFakeDaemonSetDiagnostic(t)
104
+
105
+	checkDaemonSets(d, d, fakeProject)
106
+
107
+	d.assertMessage("AGL0407", "Exp. error when client retrieves no DaemonSets", log.ErrorLevel)
108
+}
109
+
110
+func TestCheckDaemonsetsWhenErrorResponseFromClientRetrievingNodes(t *testing.T) {
111
+	d := newFakeDaemonSetDiagnostic(t)
112
+	d.clienterrors[testNodesKey] = errors.New("someerror")
113
+	d.addDaemonSetWithSelector("foo", "bar")
114
+
115
+	checkDaemonSets(d, d, fakeProject)
116
+
117
+	d.assertMessage("AGL0410", "Exp. error when client errors on retrieving Nodes", log.ErrorLevel)
118
+}
119
+
120
+func TestCheckDaemonsetsWhenDaemonsetsMatchNoNodes(t *testing.T) {
121
+	d := newFakeDaemonSetDiagnostic(t)
122
+	d.addDaemonSetWithSelector("foo", "bar")
123
+	d.addNodeWithLabel("foo", "xyz")
124
+
125
+	checkDaemonSets(d, d, fakeProject)
126
+
127
+	d.assertMessage("AGL0420", "Exp. error when daemonsets do not match any nodes", log.ErrorLevel)
128
+}
129
+
130
+func TestCheckDaemonsetsWhenDaemonsetsMatchPartialNodes(t *testing.T) {
131
+	d := newFakeDaemonSetDiagnostic(t)
132
+	d.addDaemonSetWithSelector("foo", "bar")
133
+	d.addNodeWithLabel("foo", "bar")
134
+	d.addNodeWithLabel("foo", "xyz")
135
+
136
+	checkDaemonSets(d, d, fakeProject)
137
+
138
+	d.assertMessage("AGL0425", "Exp. warning when daemonsets matches less then all the nodes", log.WarnLevel)
139
+}
140
+
141
+func TestCheckDaemonsetsWhenClientErrorsFetchingPods(t *testing.T) {
142
+	d := newFakeDaemonSetDiagnostic(t)
143
+	d.clienterrors[testPodsKey] = errors.New("some error")
144
+	d.addDaemonSetWithSelector("foo", "bar")
145
+	d.addNodeWithLabel("foo", "bar")
146
+
147
+	checkDaemonSets(d, d, fakeProject)
148
+
149
+	d.assertMessage("AGL0438", "Exp. error when there is an error retrieving pods for a daemonset", log.ErrorLevel)
150
+
151
+	d.dumpMessages()
152
+}
153
+
154
+func TestCheckDaemonsetsWhenNoPodsMatchDaemonSet(t *testing.T) {
155
+	d := newFakeDaemonSetDiagnostic(t)
156
+	d.addDaemonSetWithSelector("foo", "bar")
157
+	d.addNodeWithLabel("foo", "bar")
158
+
159
+	checkDaemonSets(d, d, fakeProject)
160
+
161
+	d.assertMessage("AGL0440", "Exp. error when there are no pods that match a daemonset", log.ErrorLevel)
162
+	d.dumpMessages()
163
+}
164
+
165
+func TestCheckDaemonsetsWhenNoPodsInRunningState(t *testing.T) {
166
+	d := newFakeDaemonSetDiagnostic(t)
167
+	d.addDaemonSetWithSelector("foo", "bar")
168
+	d.addNodeWithLabel("foo", "bar")
169
+	d.addDsPodWithPhase(kapi.PodPending)
170
+
171
+	checkDaemonSets(d, d, fakeProject)
172
+
173
+	d.assertMessage("AGL0445", "Exp. error when there are no pods in running state", log.ErrorLevel)
174
+	d.dumpMessages()
175
+}
176
+
177
+func TestCheckDaemonsetsWhenAllPodsInRunningState(t *testing.T) {
178
+	d := newFakeDaemonSetDiagnostic(t)
179
+	d.addDaemonSetWithSelector("foo", "bar")
180
+	d.addNodeWithLabel("foo", "bar")
181
+	d.addDsPodWithPhase(kapi.PodRunning)
182
+
183
+	checkDaemonSets(d, d, fakeProject)
184
+
185
+	d.assertNoErrors()
186
+	d.dumpMessages()
187
+}
0 188
new file mode 100644
... ...
@@ -0,0 +1,128 @@
0
+package aggregated_logging
1
+
2
+import (
3
+	"fmt"
4
+	"strings"
5
+
6
+	kapi "k8s.io/kubernetes/pkg/api"
7
+	"k8s.io/kubernetes/pkg/labels"
8
+	"k8s.io/kubernetes/pkg/selection"
9
+	"k8s.io/kubernetes/pkg/util/sets"
10
+
11
+	deployapi "github.com/openshift/origin/pkg/deploy/api"
12
+)
13
+
14
+const (
15
+	componentNameEs        = "es"
16
+	componentNameEsOps     = "es-ops"
17
+	componentNameKibana    = "kibana"
18
+	componentNameKibanaOps = "kibana-ops"
19
+	componentNameCurator   = "curator"
20
+)
21
+
22
+// loggingComponents are those 'managed' by rep controllers (e.g. fluentd is deployed with a DaemonSet)
23
+var loggingComponents = sets.NewString(componentNameEs, componentNameEsOps, componentNameKibana, componentNameKibanaOps, componentNameCurator)
24
+
25
+const deploymentConfigWarnMissingForOps = `
26
+Did not find a DeploymentConfig to support component '%s'.  If you require
27
+a separate ElasticSearch cluster to aggregate operations logs, please re-install
28
+or update logging and specify the appropriate switch to enable the ops cluster.
29
+`
30
+
31
+const deploymentConfigZeroPodsFound = `
32
+There were no Pods found that support logging.  Try running
33
+the following commands for additional information:
34
+
35
+  oc describe dc -n %[1]s
36
+  oc get events -n %[1]s
37
+`
38
+const deploymentConfigNoPodsFound = `
39
+There were no Pods found for DeploymentConfig '%[1]s'.  Try running
40
+the following commands for additional information:
41
+
42
+  oc describe dc %[1]s -n %[2]s
43
+  oc get events -n %[2]s
44
+`
45
+const deploymentConfigPodsNotRunning = `
46
+The Pod '%[1]s' matched by DeploymentConfig '%[2]s' is not in '%[3]s' status: %[4]s. 
47
+
48
+Depending upon the state, this could mean there is an error running the image 
49
+for one or more pod containers, the node could be pulling images, etc.  Try running
50
+the following commands for additional information:
51
+
52
+  oc describe pod %[1]s -n %[5]s
53
+  oc logs %[1]s -n %[5]s
54
+  oc get events -n %[5]s
55
+`
56
+
57
+func checkDeploymentConfigs(r diagnosticReporter, adapter deploymentConfigAdapter, project string) {
58
+	req, _ := labels.NewRequirement(loggingInfraKey, selection.Exists, nil)
59
+	selector := labels.NewSelector().Add(*req)
60
+	r.Debug("AGL0040", fmt.Sprintf("Checking for DeploymentConfigs in project '%s' with selector '%s'", project, selector))
61
+	dcList, err := adapter.deploymentconfigs(project, kapi.ListOptions{LabelSelector: selector})
62
+	if err != nil {
63
+		r.Error("AGL0045", err, fmt.Sprintf("There was an error while trying to retrieve the DeploymentConfigs in project '%s': %s", project, err))
64
+		return
65
+	}
66
+	if len(dcList.Items) == 0 {
67
+		r.Error("AGL0047", nil, fmt.Sprintf("Did not find any matching DeploymentConfigs in project '%s' which means no logging components were deployed.  Try running the installer.", project))
68
+		return
69
+	}
70
+	found := sets.NewString()
71
+	for _, entry := range dcList.Items {
72
+		comp := labels.Set(entry.ObjectMeta.Labels).Get(componentKey)
73
+		found.Insert(comp)
74
+		r.Debug("AGL0050", fmt.Sprintf("Found DeploymentConfig '%s' for component '%s'", entry.ObjectMeta.Name, comp))
75
+	}
76
+	for _, entry := range loggingComponents.List() {
77
+		exists := found.Has(entry)
78
+		if !exists {
79
+			if strings.HasSuffix(entry, "-ops") {
80
+				r.Info("AGL0060", fmt.Sprintf(deploymentConfigWarnMissingForOps, entry))
81
+			} else {
82
+				r.Error("AGL0065", nil, fmt.Sprintf("Did not find a DeploymentConfig to support component '%s'", entry))
83
+			}
84
+		}
85
+	}
86
+	checkDeploymentConfigPods(r, adapter, *dcList, project)
87
+}
88
+
89
+func checkDeploymentConfigPods(r diagnosticReporter, adapter deploymentConfigAdapter, dcs deployapi.DeploymentConfigList, project string) {
90
+	compReq, _ := labels.NewRequirement(componentKey, selection.In, loggingComponents)
91
+	provReq, _ := labels.NewRequirement(providerKey, selection.Equals, sets.NewString(openshiftValue))
92
+	podSelector := labels.NewSelector().Add(*compReq, *provReq)
93
+	r.Debug("AGL0070", fmt.Sprintf("Getting pods that match selector '%s'", podSelector))
94
+	podList, err := adapter.pods(project, kapi.ListOptions{LabelSelector: podSelector})
95
+	if err != nil {
96
+		r.Error("AGL0075", err, fmt.Sprintf("There was an error while trying to retrieve the pods for the AggregatedLogging stack: %s", err))
97
+		return
98
+	}
99
+	if len(podList.Items) == 0 {
100
+		r.Error("AGL0080", nil, fmt.Sprintf(deploymentConfigZeroPodsFound, project))
101
+		return
102
+	}
103
+	dcPodCount := make(map[string]int, len(dcs.Items))
104
+	for _, dc := range dcs.Items {
105
+		dcPodCount[dc.ObjectMeta.Name] = 0
106
+	}
107
+
108
+	for _, pod := range podList.Items {
109
+		r.Debug("AGL0082", fmt.Sprintf("Checking status of Pod '%s'...", pod.ObjectMeta.Name))
110
+		dcName, hasDcName := pod.ObjectMeta.Annotations[deployapi.DeploymentConfigAnnotation]
111
+		if !hasDcName {
112
+			r.Warn("AGL0085", nil, fmt.Sprintf("Found Pod '%s' that that does not reference a logging deployment config which may be acceptable. Skipping check to see if its running.", pod.ObjectMeta.Name))
113
+			continue
114
+		}
115
+		if pod.Status.Phase != kapi.PodRunning {
116
+			podName := pod.ObjectMeta.Name
117
+			r.Error("AGL0090", nil, fmt.Sprintf(deploymentConfigPodsNotRunning, podName, dcName, kapi.PodRunning, pod.Status.Phase, project))
118
+		}
119
+		count, _ := dcPodCount[dcName]
120
+		dcPodCount[dcName] = count + 1
121
+	}
122
+	for name, count := range dcPodCount {
123
+		if count == 0 {
124
+			r.Error("AGL0095", nil, fmt.Sprintf(deploymentConfigNoPodsFound, name, project))
125
+		}
126
+	}
127
+}
0 128
new file mode 100644
... ...
@@ -0,0 +1,153 @@
0
+package aggregated_logging
1
+
2
+import (
3
+	"errors"
4
+	"testing"
5
+
6
+	kapi "k8s.io/kubernetes/pkg/api"
7
+
8
+	deployapi "github.com/openshift/origin/pkg/deploy/api"
9
+	"github.com/openshift/origin/pkg/diagnostics/log"
10
+)
11
+
12
+const (
13
+	testDcPodsKey      = "pods"
14
+	testDcKey          = "deploymentconfigs"
15
+	testSkipAnnotation = "skipAddAnnoation"
16
+)
17
+
18
+type fakeDeploymentConfigsDiagnostic struct {
19
+	fakeDiagnostic
20
+	fakePods     kapi.PodList
21
+	fakeDcs      deployapi.DeploymentConfigList
22
+	clienterrors map[string]error
23
+}
24
+
25
+func newFakeDeploymentConfigsDiagnostic(t *testing.T) *fakeDeploymentConfigsDiagnostic {
26
+	return &fakeDeploymentConfigsDiagnostic{
27
+		fakeDiagnostic: *newFakeDiagnostic(t),
28
+		clienterrors:   map[string]error{},
29
+	}
30
+}
31
+func (f *fakeDeploymentConfigsDiagnostic) addDeployConfigFor(component string) {
32
+	labels := map[string]string{componentKey: component}
33
+	dc := deployapi.DeploymentConfig{
34
+		ObjectMeta: kapi.ObjectMeta{
35
+			Name:   component + "Name",
36
+			Labels: labels,
37
+		},
38
+	}
39
+	f.fakeDcs.Items = append(f.fakeDcs.Items, dc)
40
+}
41
+
42
+func (f *fakeDeploymentConfigsDiagnostic) addPodFor(comp string, state kapi.PodPhase) {
43
+	annotations := map[string]string{}
44
+	if comp != testSkipAnnotation {
45
+		annotations[deployapi.DeploymentConfigAnnotation] = comp
46
+	}
47
+	pod := kapi.Pod{
48
+		ObjectMeta: kapi.ObjectMeta{
49
+			Name:        comp,
50
+			Annotations: annotations,
51
+		},
52
+		Spec: kapi.PodSpec{},
53
+		Status: kapi.PodStatus{
54
+			Phase: state,
55
+		},
56
+	}
57
+	f.fakePods.Items = append(f.fakePods.Items, pod)
58
+}
59
+
60
+func (f *fakeDeploymentConfigsDiagnostic) deploymentconfigs(project string, options kapi.ListOptions) (*deployapi.DeploymentConfigList, error) {
61
+	f.test.Logf(">> calling deploymentconfigs: %s", f.clienterrors)
62
+	value, ok := f.clienterrors[testDcKey]
63
+	if ok {
64
+		f.test.Logf(">> error key found..returning %s", value)
65
+		return nil, value
66
+	}
67
+	f.test.Logf(">> error key not found..")
68
+	return &f.fakeDcs, nil
69
+}
70
+
71
+func (f *fakeDeploymentConfigsDiagnostic) pods(project string, options kapi.ListOptions) (*kapi.PodList, error) {
72
+	value, ok := f.clienterrors[testDcPodsKey]
73
+	if ok {
74
+		return nil, value
75
+	}
76
+	return &f.fakePods, nil
77
+}
78
+
79
+//test client error listing dcs
80
+func TestCheckDcWhenErrorResponseFromClientRetrievingDc(t *testing.T) {
81
+	d := newFakeDeploymentConfigsDiagnostic(t)
82
+	d.clienterrors[testDcKey] = errors.New("error")
83
+
84
+	checkDeploymentConfigs(d, d, fakeProject)
85
+
86
+	d.assertMessage("AGL0045", "Exp. an error when client returns error retrieving dcs", log.ErrorLevel)
87
+	d.dumpMessages()
88
+}
89
+
90
+func TestCheckDcWhenNoDeployConfigsFound(t *testing.T) {
91
+	d := newFakeDeploymentConfigsDiagnostic(t)
92
+
93
+	checkDeploymentConfigs(d, d, fakeProject)
94
+
95
+	d.assertMessage("AGL0047", "Exp. an error when no DeploymentConfigs are found", log.ErrorLevel)
96
+	d.dumpMessages()
97
+}
98
+
99
+func TestCheckDcWhenOpsOrOtherDeployConfigsMissing(t *testing.T) {
100
+	d := newFakeDeploymentConfigsDiagnostic(t)
101
+	d.addDeployConfigFor(componentNameEs)
102
+
103
+	checkDeploymentConfigs(d, d, fakeProject)
104
+
105
+	d.assertMessage("AGL0060", "Exp. a warning when ops DeploymentConfigs are missing", log.InfoLevel)
106
+	d.assertMessage("AGL0065", "Exp. an error when non-ops DeploymentConfigs are missing", log.ErrorLevel)
107
+	d.dumpMessages()
108
+}
109
+
110
+func TestCheckDcWhenClientErrorListingPods(t *testing.T) {
111
+	d := newFakeDeploymentConfigsDiagnostic(t)
112
+	d.clienterrors[testDcPodsKey] = errors.New("New pod error")
113
+	for _, comp := range loggingComponents.List() {
114
+		d.addDeployConfigFor(comp)
115
+	}
116
+
117
+	checkDeploymentConfigs(d, d, fakeProject)
118
+
119
+	d.assertMessage("AGL0075", "Exp. an error when retrieving pods errors", log.ErrorLevel)
120
+	d.dumpMessages()
121
+}
122
+
123
+func TestCheckDcWhenNoPodsFoundMatchingDeployConfig(t *testing.T) {
124
+	d := newFakeDeploymentConfigsDiagnostic(t)
125
+	for _, comp := range loggingComponents.List() {
126
+		d.addDeployConfigFor(comp)
127
+	}
128
+
129
+	checkDeploymentConfigs(d, d, fakeProject)
130
+
131
+	d.assertMessage("AGL0080", "Exp. an error when retrieving pods errors", log.ErrorLevel)
132
+	d.dumpMessages()
133
+}
134
+
135
+func TestCheckDcWhenInVariousStates(t *testing.T) {
136
+	d := newFakeDeploymentConfigsDiagnostic(t)
137
+	for _, comp := range loggingComponents.List() {
138
+		d.addDeployConfigFor(comp)
139
+		d.addPodFor(comp, kapi.PodRunning)
140
+	}
141
+	d.addPodFor(testSkipAnnotation, kapi.PodRunning)
142
+	d.addPodFor("someothercomponent", kapi.PodPending)
143
+	d.addDeployConfigFor("somerandom component")
144
+
145
+	checkDeploymentConfigs(d, d, fakeProject)
146
+
147
+	d.assertMessage("AGL0085", "Exp. a warning when pod is missing DeployConfig annotation", log.WarnLevel)
148
+	d.assertMessage("AGL0090", "Exp. an error when pod is not in running state", log.ErrorLevel)
149
+	d.assertMessage("AGL0095", "Exp. an error when pods not found for a DeployConfig", log.ErrorLevel)
150
+
151
+	d.dumpMessages()
152
+}
0 153
new file mode 100644
... ...
@@ -0,0 +1,207 @@
0
+package aggregated_logging
1
+
2
+import (
3
+	"errors"
4
+	"fmt"
5
+	"net/url"
6
+
7
+	kapi "k8s.io/kubernetes/pkg/api"
8
+	kapisext "k8s.io/kubernetes/pkg/apis/extensions"
9
+	kclient "k8s.io/kubernetes/pkg/client/unversioned"
10
+	"k8s.io/kubernetes/pkg/labels"
11
+
12
+	authapi "github.com/openshift/origin/pkg/authorization/api"
13
+	"github.com/openshift/origin/pkg/client"
14
+	configapi "github.com/openshift/origin/pkg/cmd/server/api"
15
+	deployapi "github.com/openshift/origin/pkg/deploy/api"
16
+	hostdiag "github.com/openshift/origin/pkg/diagnostics/host"
17
+	"github.com/openshift/origin/pkg/diagnostics/types"
18
+	routesapi "github.com/openshift/origin/pkg/route/api"
19
+)
20
+
21
+// AggregatedLogging is a Diagnostic to check the configurations
22
+// and general integration of the OpenShift stack
23
+// for aggregating container logs
24
+// https://github.com/openshift/origin-aggregated-logging
25
+type AggregatedLogging struct {
26
+	masterConfig     *configapi.MasterConfig
27
+	MasterConfigFile string
28
+	OsClient         *client.Client
29
+	KubeClient       *kclient.Client
30
+	result           types.DiagnosticResult
31
+}
32
+
33
+const (
34
+	AggregatedLoggingName = "AggregatedLogging"
35
+
36
+	loggingInfraKey = "logging-infra"
37
+	componentKey    = "component"
38
+	providerKey     = "provider"
39
+	openshiftValue  = "openshift"
40
+
41
+	fluentdServiceAccountName = "aggregated-logging-fluentd"
42
+)
43
+
44
+var loggingSelector = labels.Set{loggingInfraKey: "support"}
45
+
46
+//NewAggregatedLogging returns the AggregatedLogging Diagnostic
47
+func NewAggregatedLogging(masterConfigFile string, kclient *kclient.Client, osclient *client.Client) *AggregatedLogging {
48
+	return &AggregatedLogging{nil, masterConfigFile, osclient, kclient, types.NewDiagnosticResult(AggregatedLoggingName)}
49
+}
50
+
51
+func (d *AggregatedLogging) getScc(name string) (*kapi.SecurityContextConstraints, error) {
52
+	return d.KubeClient.SecurityContextConstraints().Get(name)
53
+}
54
+
55
+func (d *AggregatedLogging) getClusterRoleBinding(name string) (*authapi.ClusterRoleBinding, error) {
56
+	return d.OsClient.ClusterRoleBindings().Get(name)
57
+}
58
+
59
+func (d *AggregatedLogging) routes(project string, options kapi.ListOptions) (*routesapi.RouteList, error) {
60
+	return d.OsClient.Routes(project).List(options)
61
+}
62
+
63
+func (d *AggregatedLogging) serviceAccounts(project string, options kapi.ListOptions) (*kapi.ServiceAccountList, error) {
64
+	return d.KubeClient.ServiceAccounts(project).List(options)
65
+}
66
+
67
+func (d *AggregatedLogging) services(project string, options kapi.ListOptions) (*kapi.ServiceList, error) {
68
+	return d.KubeClient.Services(project).List(options)
69
+}
70
+
71
+func (d *AggregatedLogging) endpointsForService(project string, service string) (*kapi.Endpoints, error) {
72
+	return d.KubeClient.Endpoints(project).Get(service)
73
+}
74
+
75
+func (d *AggregatedLogging) daemonsets(project string, options kapi.ListOptions) (*kapisext.DaemonSetList, error) {
76
+	return d.KubeClient.DaemonSets(project).List(kapi.ListOptions{LabelSelector: loggingInfraFluentdSelector.AsSelector()})
77
+}
78
+
79
+func (d *AggregatedLogging) nodes(options kapi.ListOptions) (*kapi.NodeList, error) {
80
+	return d.KubeClient.Nodes().List(kapi.ListOptions{})
81
+}
82
+
83
+func (d *AggregatedLogging) pods(project string, options kapi.ListOptions) (*kapi.PodList, error) {
84
+	return d.KubeClient.Pods(project).List(options)
85
+}
86
+func (d *AggregatedLogging) deploymentconfigs(project string, options kapi.ListOptions) (*deployapi.DeploymentConfigList, error) {
87
+	return d.OsClient.DeploymentConfigs(project).List(options)
88
+}
89
+
90
+func (d *AggregatedLogging) Info(id string, message string) {
91
+	d.result.Info(id, message)
92
+}
93
+
94
+func (d *AggregatedLogging) Error(id string, err error, message string) {
95
+	d.result.Error(id, err, message)
96
+}
97
+
98
+func (d *AggregatedLogging) Debug(id string, message string) {
99
+	d.result.Debug(id, message)
100
+}
101
+
102
+func (d *AggregatedLogging) Warn(id string, err error, message string) {
103
+	d.result.Warn(id, err, message)
104
+}
105
+
106
+func (d *AggregatedLogging) Name() string {
107
+	return AggregatedLoggingName
108
+}
109
+
110
+func (d *AggregatedLogging) Description() string {
111
+	return "Check aggregated logging integration for proper configuration"
112
+}
113
+
114
+func (d *AggregatedLogging) CanRun() (bool, error) {
115
+	if len(d.MasterConfigFile) == 0 {
116
+		return false, errors.New("No master config file was provided")
117
+	}
118
+	if d.OsClient == nil {
119
+		return false, errors.New("Config must include a cluster-admin context to run this diagnostic")
120
+	}
121
+	if d.KubeClient == nil {
122
+		return false, errors.New("Config must include a cluster-admin context to run this diagnostic")
123
+	}
124
+	var err error
125
+	d.masterConfig, err = hostdiag.GetMasterConfig(d.result, d.MasterConfigFile)
126
+	if err != nil {
127
+		return false, errors.New("Unreadable master config; skipping this diagnostic.")
128
+	}
129
+	return true, nil
130
+}
131
+
132
+func (d *AggregatedLogging) Check() types.DiagnosticResult {
133
+	project := retrieveLoggingProject(d.result, d.masterConfig, d.OsClient)
134
+	if len(project) != 0 {
135
+		checkServiceAccounts(d, d, project)
136
+		checkClusterRoleBindings(d, d, project)
137
+		checkSccs(d, d, project)
138
+		checkDeploymentConfigs(d, d, project)
139
+		checkDaemonSets(d, d, project)
140
+		checkServices(d, d, project)
141
+		checkRoutes(d, d, project)
142
+		checkKibana(d.result, d.OsClient, d.KubeClient, project)
143
+	}
144
+	return d.result
145
+}
146
+
147
+const projectNodeSelectorWarning = `
148
+The project '%[1]s' was found with a non-empty node selector annotation.  This will keep
149
+Fluentd from running on certain nodes and collecting logs from the entire cluster.  You
150
+can correct it by editing the project:
151
+
152
+  oc edit namespace %[1]s
153
+
154
+and updating the annotation:
155
+
156
+  'openshift.io/node-selector' : ""
157
+
158
+`
159
+
160
+func retrieveLoggingProject(r types.DiagnosticResult, masterCfg *configapi.MasterConfig, osClient *client.Client) string {
161
+	r.Debug("AGL0010", fmt.Sprintf("masterConfig.AssetConfig.LoggingPublicURL: '%s'", masterCfg.AssetConfig.LoggingPublicURL))
162
+	projectName := ""
163
+	if len(masterCfg.AssetConfig.LoggingPublicURL) == 0 {
164
+		r.Debug("AGL0017", "masterConfig.AssetConfig.LoggingPublicURL is empty")
165
+		return projectName
166
+	}
167
+
168
+	loggingUrl, err := url.Parse(masterCfg.AssetConfig.LoggingPublicURL)
169
+	if err != nil {
170
+		r.Error("AGL0011", err, fmt.Sprintf("Unable to parse the loggingPublicURL from the masterConfig '%s'", masterCfg.AssetConfig.LoggingPublicURL))
171
+		return projectName
172
+	}
173
+
174
+	routeList, err := osClient.Routes(kapi.NamespaceAll).List(kapi.ListOptions{LabelSelector: loggingSelector.AsSelector()})
175
+	if err != nil {
176
+		r.Error("AGL0012", err, fmt.Sprintf("There was an error while trying to find the route associated with '%s' which is probably transient: %s", loggingUrl, err))
177
+		return projectName
178
+	}
179
+
180
+	for _, route := range routeList.Items {
181
+		r.Debug("AGL0013", fmt.Sprintf("Comparing URL to route.Spec.Host: %s", route.Spec.Host))
182
+		if loggingUrl.Host == route.Spec.Host {
183
+			if len(projectName) == 0 {
184
+				projectName = route.ObjectMeta.Namespace
185
+				r.Info("AGL0015", fmt.Sprintf("Found route '%s' matching logging URL '%s' in project: '%s'", route.ObjectMeta.Name, loggingUrl.Host, projectName))
186
+			} else {
187
+				r.Warn("AGL0019", nil, fmt.Sprintf("Found additional route '%s' matching logging URL '%s' in project: '%s'.  This could mean you have multiple logging deployments.", route.ObjectMeta.Name, loggingUrl.Host, projectName))
188
+			}
189
+		}
190
+	}
191
+	if len(projectName) == 0 {
192
+		message := fmt.Sprintf("Unable to find a route matching the loggingPublicURL defined in the master config '%s'. Check that the URL is correct and aggregated logging is deployed.", loggingUrl)
193
+		r.Error("AGL0014", errors.New(message), message)
194
+		return ""
195
+	}
196
+	project, err := osClient.Projects().Get(projectName)
197
+	if err != nil {
198
+		r.Error("AGL0018", err, fmt.Sprintf("There was an error retrieving project '%s' which is most likely a transient error: %s", projectName, err))
199
+		return ""
200
+	}
201
+	nodeSelector, ok := project.ObjectMeta.Annotations["openshift.io/node-selector"]
202
+	if ok && len(nodeSelector) != 0 {
203
+		r.Warn("AGL0030", nil, fmt.Sprintf(projectNodeSelectorWarning, projectName))
204
+	}
205
+	return projectName
206
+}
0 207
new file mode 100644
... ...
@@ -0,0 +1,78 @@
0
+package aggregated_logging
1
+
2
+import (
3
+	"github.com/openshift/origin/pkg/diagnostics/log"
4
+	"testing"
5
+)
6
+
7
+const fakeProject = "someProject"
8
+
9
+type fakeLogMessage struct {
10
+	id       string
11
+	logLevel log.Level
12
+	message  string
13
+}
14
+
15
+type fakeDiagnostic struct {
16
+	err      error
17
+	messages map[string]fakeLogMessage
18
+	test     *testing.T
19
+}
20
+
21
+func newFakeDiagnostic(t *testing.T) *fakeDiagnostic {
22
+	return &fakeDiagnostic{
23
+		messages: map[string]fakeLogMessage{},
24
+		test:     t,
25
+	}
26
+}
27
+
28
+func (f *fakeDiagnostic) dumpMessages() {
29
+	f.test.Log("<<<<<<<< Dumping test messages >>>>>>>>")
30
+	for id, message := range f.messages {
31
+		f.test.Logf("id: %s, logLevel: %s, message: %s", id, message.logLevel.Name, message.message)
32
+	}
33
+}
34
+
35
+func (f *fakeDiagnostic) Info(id string, message string) {
36
+	f.messages[id] = fakeLogMessage{id, log.InfoLevel, message}
37
+}
38
+
39
+func (f *fakeDiagnostic) Error(id string, err error, message string) {
40
+	f.messages[id] = fakeLogMessage{id, log.ErrorLevel, message}
41
+}
42
+
43
+func (f *fakeDiagnostic) Debug(id string, message string) {
44
+	f.messages[id] = fakeLogMessage{id, log.DebugLevel, message}
45
+}
46
+
47
+func (f *fakeDiagnostic) Warn(id string, err error, message string) {
48
+	f.messages[id] = fakeLogMessage{id, log.WarnLevel, message}
49
+}
50
+
51
+func (d *fakeDiagnostic) assertMessage(id string, missing string, level log.Level) {
52
+	message, ok := d.messages[id]
53
+	if !ok {
54
+		d.test.Errorf("Unable to find message with id %s. %s", id, missing)
55
+		return
56
+	}
57
+	if message.logLevel != level {
58
+		d.test.Errorf("Exp logLevel %s for %s but got %s", level.Name, id, message.logLevel.Name)
59
+	}
60
+}
61
+
62
+func (d *fakeDiagnostic) assertNoWarnings() {
63
+	for _, message := range d.messages {
64
+
65
+		if message.logLevel == log.WarnLevel {
66
+			d.test.Errorf("Exp no WarnLevel log messages.")
67
+		}
68
+	}
69
+}
70
+func (d *fakeDiagnostic) assertNoErrors() {
71
+	for _, message := range d.messages {
72
+
73
+		if message.logLevel == log.ErrorLevel {
74
+			d.test.Errorf("Exp no ErrorLevel log messages.")
75
+		}
76
+	}
77
+}
0 78
new file mode 100644
... ...
@@ -0,0 +1,60 @@
0
+package aggregated_logging
1
+
2
+import (
3
+	kapi "k8s.io/kubernetes/pkg/api"
4
+	kapisext "k8s.io/kubernetes/pkg/apis/extensions"
5
+
6
+	authapi "github.com/openshift/origin/pkg/authorization/api"
7
+	deployapi "github.com/openshift/origin/pkg/deploy/api"
8
+	routesapi "github.com/openshift/origin/pkg/route/api"
9
+)
10
+
11
+//diagnosticReporter provides diagnostic messages
12
+type diagnosticReporter interface {
13
+	Info(id string, message string)
14
+	Debug(id string, message string)
15
+	Error(id string, err error, message string)
16
+	Warn(id string, err error, message string)
17
+}
18
+
19
+type routesAdapter interface {
20
+	routes(project string, options kapi.ListOptions) (*routesapi.RouteList, error)
21
+}
22
+
23
+type sccAdapter interface {
24
+	getScc(name string) (*kapi.SecurityContextConstraints, error)
25
+}
26
+
27
+type clusterRoleBindingsAdapter interface {
28
+	getClusterRoleBinding(name string) (*authapi.ClusterRoleBinding, error)
29
+}
30
+
31
+//deploymentConfigAdapter is an abstraction to retrieve resource for validating dcs
32
+//for aggregated logging diagnostics
33
+type deploymentConfigAdapter interface {
34
+	deploymentconfigs(project string, options kapi.ListOptions) (*deployapi.DeploymentConfigList, error)
35
+	podsAdapter
36
+}
37
+
38
+//daemonsetAdapter is an abstraction to retrieve resources for validating daemonsets
39
+//for aggregated logging diagnostics
40
+type daemonsetAdapter interface {
41
+	daemonsets(project string, options kapi.ListOptions) (*kapisext.DaemonSetList, error)
42
+	nodes(options kapi.ListOptions) (*kapi.NodeList, error)
43
+	podsAdapter
44
+}
45
+
46
+type podsAdapter interface {
47
+	pods(project string, options kapi.ListOptions) (*kapi.PodList, error)
48
+}
49
+
50
+//saAdapter abstractions to retrieve service accounts
51
+type saAdapter interface {
52
+	serviceAccounts(project string, options kapi.ListOptions) (*kapi.ServiceAccountList, error)
53
+}
54
+
55
+//servicesAdapter abstracts retrieving services
56
+type servicesAdapter interface {
57
+	services(project string, options kapi.ListOptions) (*kapi.ServiceList, error)
58
+	endpointsForService(project string, serviceName string) (*kapi.Endpoints, error)
59
+}
0 60
new file mode 100644
... ...
@@ -0,0 +1,98 @@
0
+package aggregated_logging
1
+
2
+import (
3
+	"errors"
4
+	"fmt"
5
+	"net/url"
6
+	"strings"
7
+
8
+	kapi "k8s.io/kubernetes/pkg/api"
9
+	kclient "k8s.io/kubernetes/pkg/client/unversioned"
10
+	"k8s.io/kubernetes/pkg/util/sets"
11
+
12
+	"github.com/openshift/origin/pkg/client"
13
+	"github.com/openshift/origin/pkg/diagnostics/types"
14
+	oauthapi "github.com/openshift/origin/pkg/oauth/api"
15
+)
16
+
17
+const (
18
+	kibanaProxyOauthClientName = "kibana-proxy"
19
+	kibanaProxySecretName      = "logging-kibana-proxy"
20
+	oauthSecretKeyName         = "oauth-secret"
21
+)
22
+
23
+//checkKibana verifies the various integration points between Kibana and logging
24
+func checkKibana(r types.DiagnosticResult, osClient *client.Client, kClient *kclient.Client, project string) {
25
+	oauthclient, err := osClient.OAuthClients().Get(kibanaProxyOauthClientName)
26
+	if err != nil {
27
+		r.Error("AGL0115", err, fmt.Sprintf("Error retrieving the OauthClient '%s'. Unable to check Kibana", kibanaProxyOauthClientName))
28
+		return
29
+	}
30
+	checkKibanaSecret(r, osClient, kClient, project, oauthclient)
31
+	checkKibanaRoutesInOauthClient(r, osClient, project, oauthclient)
32
+}
33
+
34
+//checkKibanaSecret confirms the secret used by kibana matches that configured in the oauth client
35
+func checkKibanaSecret(r types.DiagnosticResult, osClient *client.Client, kClient *kclient.Client, project string, oauthclient *oauthapi.OAuthClient) {
36
+	r.Debug("AGL0100", "Checking oauthclient secrets...")
37
+	secret, err := kClient.Secrets(project).Get(kibanaProxySecretName)
38
+	if err != nil {
39
+		r.Error("AGL0105", err, fmt.Sprintf("Error retrieving the secret '%s'", kibanaProxySecretName))
40
+		return
41
+	}
42
+	decoded, err := decodeSecret(secret, oauthSecretKeyName)
43
+	if err != nil {
44
+		r.Error("AGL0110", err, fmt.Sprintf("Unable to decode Kibana Secret"))
45
+		return
46
+	}
47
+	if decoded != oauthclient.Secret {
48
+		r.Debug("AGL0120", fmt.Sprintf("OauthClient Secret:    '%s'", oauthclient.Secret))
49
+		r.Debug("AGL0125", fmt.Sprintf("Decoded Kibana Secret: '%s'", decoded))
50
+		message := fmt.Sprintf("The %s OauthClient.Secret does not match the decoded oauth secret in '%s'", kibanaProxyOauthClientName, kibanaProxySecretName)
51
+		r.Error("AGL0130", errors.New(message), message)
52
+	}
53
+}
54
+
55
+//checkKibanaRoutesInOauthClient verifies the client contains the correct redirect uris
56
+func checkKibanaRoutesInOauthClient(r types.DiagnosticResult, osClient *client.Client, project string, oauthclient *oauthapi.OAuthClient) {
57
+	r.Debug("AGL0141", "Checking oauthclient redirectURIs for the logging routes...")
58
+	routeList, err := osClient.Routes(project).List(kapi.ListOptions{LabelSelector: loggingSelector.AsSelector()})
59
+	if err != nil {
60
+		r.Error("AGL0143", err, "Error retrieving the logging routes.")
61
+		return
62
+	}
63
+	redirectUris, err := parseRedirectUris(oauthclient.RedirectURIs)
64
+	if err != nil {
65
+		r.Error("AGL0145", err, "Error parsing the OAuthClient.RedirectURIs")
66
+		return
67
+	}
68
+	for _, route := range routeList.Items {
69
+		if !redirectUris.Has(route.Spec.Host) {
70
+			message := fmt.Sprintf("OauthClient '%s' does not include a redirectURI for route '%s' which is '%s'", oauthclient.ObjectMeta.Name, route.ObjectMeta.Name, route.Spec.Host)
71
+			r.Error("AGL0147", errors.New(message), message)
72
+		}
73
+	}
74
+
75
+	return
76
+}
77
+
78
+func parseRedirectUris(uris []string) (sets.String, error) {
79
+	urls := sets.String{}
80
+	for _, uri := range uris {
81
+		url, err := url.Parse(uri)
82
+		if err != nil {
83
+			return urls, err
84
+		}
85
+		urls.Insert(url.Host)
86
+	}
87
+	return urls, nil
88
+}
89
+
90
+// decodeSecret decodes a base64 encoded entry in a secret and returns the value as decoded string
91
+func decodeSecret(secret *kapi.Secret, key string) (string, error) {
92
+	value, ok := secret.Data[key]
93
+	if !ok {
94
+		return "", errors.New(fmt.Sprintf("The %s secret did not have a data entry for %s", secret.ObjectMeta.Name, key))
95
+	}
96
+	return strings.TrimSpace(string(value)), nil
97
+}
0 98
new file mode 100644
... ...
@@ -0,0 +1,95 @@
0
+package aggregated_logging
1
+
2
+import (
3
+	"crypto/tls"
4
+	"crypto/x509"
5
+	"encoding/pem"
6
+	"errors"
7
+	"fmt"
8
+
9
+	kapi "k8s.io/kubernetes/pkg/api"
10
+
11
+	routes "github.com/openshift/origin/pkg/route/api"
12
+)
13
+
14
+const routeUnaccepted = `
15
+An unaccepted route is most likely due to one of the following reasons:
16
+
17
+* No router has been deployed to serve the route.
18
+* Another route with the same host already exists.
19
+
20
+If a router has been deployed, look for duplicate matching routes by
21
+running the following:
22
+
23
+  oc get --all-namespaces routes --template='{{range .items}}{{if eq .spec.host "%[2]s"}}{{println .metadata.name "in" .metadata.namespace}}{{end}}{{end}}'
24
+
25
+`
26
+const routeCertMissingHostName = `
27
+Try updating the route certificate to include its host as either the CommonName (CN) or one of the alternate names.
28
+`
29
+
30
+//checkRoutes looks through the logging infra routes to see if they have been accepted, and ...
31
+func checkRoutes(r diagnosticReporter, adapter routesAdapter, project string) {
32
+	r.Debug("AGL0300", "Checking routes...")
33
+	routeList, err := adapter.routes(project, kapi.ListOptions{LabelSelector: loggingSelector.AsSelector()})
34
+	if err != nil {
35
+		r.Error("AGL0305", err, fmt.Sprintf("There was an error retrieving routes in the project '%s' with selector '%s'", project, loggingSelector.AsSelector()))
36
+		return
37
+	}
38
+	if len(routeList.Items) == 0 {
39
+		r.Error("AGL0310", nil, fmt.Sprintf("There were no routes found to support logging in project '%s'", project))
40
+		return
41
+	}
42
+	for _, route := range routeList.Items {
43
+		if !wasAccepted(r, route) {
44
+			r.Error("AGL0325", nil, fmt.Sprintf("Route '%s' has not been accepted by any routers."+routeUnaccepted, route.ObjectMeta.Name, route.Spec.Host))
45
+		}
46
+		if route.Spec.TLS != nil && len(route.Spec.TLS.Certificate) != 0 && len(route.Spec.TLS.Key) != 0 {
47
+			checkRouteCertificate(r, route)
48
+		} else {
49
+			r.Debug("AGL0331", fmt.Sprintf("Skipping key and certificate checks on route '%s'.  Either of them may be missing.", route.ObjectMeta.Name))
50
+		}
51
+	}
52
+}
53
+
54
+func checkRouteCertificate(r diagnosticReporter, route routes.Route) {
55
+	r.Debug("AGL0330", fmt.Sprintf("Checking certificate for route '%s'...", route.ObjectMeta.Name))
56
+	block, _ := pem.Decode([]byte(route.Spec.TLS.Certificate))
57
+	//verify hostname
58
+	if block != nil {
59
+		cert, err := x509.ParseCertificate(block.Bytes)
60
+		if err != nil {
61
+			r.Error("AGL0335", err, fmt.Sprintf("Unable to parse the certificate for route '%s'", route.ObjectMeta.Name))
62
+			return
63
+		}
64
+		r.Debug("AGL0340", fmt.Sprintf("Cert CommonName: '%s' Cert DNSNames: '%s'", cert.Subject.CommonName, cert.DNSNames))
65
+		if err := cert.VerifyHostname(route.Spec.Host); err != nil {
66
+			r.Error("AGL0345", err, fmt.Sprintf("Route '%[1]s' certficate does not include route host '%[2]s'"+routeCertMissingHostName, route.ObjectMeta.Name, route.Spec.Host))
67
+		}
68
+	} else {
69
+		r.Error("AGL0350", errors.New("Unable to decode the TLS Certificate"), "Unable to decode the TLS Certificate")
70
+	}
71
+
72
+	//verify key matches cert
73
+	r.Debug("AGL0355", fmt.Sprintf("Checking certificate matches key for route '%s'", route.ObjectMeta.Name))
74
+	_, err := tls.X509KeyPair([]byte(route.Spec.TLS.Certificate), []byte(route.Spec.TLS.Key))
75
+	if err != nil {
76
+		r.Error("AGL0365", err, fmt.Sprintf("Route '%s' key and certficate do not match: %s.  The router will be unable to pass traffic using this route.", route.ObjectMeta.Name, err))
77
+	}
78
+}
79
+
80
+func wasAccepted(r diagnosticReporter, route routes.Route) bool {
81
+	r.Debug("AGL0310", fmt.Sprintf("Checking if route '%s' was accepted...", route.ObjectMeta.Name))
82
+	accepted := 0
83
+	for _, status := range route.Status.Ingress {
84
+		r.Debug("AGL0315", fmt.Sprintf("Status for router: '%s', host: '%s'", status.RouterName, status.Host))
85
+		for _, condition := range status.Conditions {
86
+			r.Debug("AGL0320", fmt.Sprintf("Condition type: '%s' status: '%s'", condition.Type, condition.Status))
87
+			if condition.Type == routes.RouteAdmitted && condition.Status == kapi.ConditionTrue {
88
+				accepted = accepted + 1
89
+			}
90
+		}
91
+	}
92
+	//Add check to compare acceptance to the number of available routers?
93
+	return accepted > 0
94
+}
0 95
new file mode 100644
... ...
@@ -0,0 +1,115 @@
0
+package aggregated_logging
1
+
2
+import (
3
+	"errors"
4
+	"testing"
5
+
6
+	kapi "k8s.io/kubernetes/pkg/api"
7
+
8
+	"github.com/openshift/origin/pkg/diagnostics/log"
9
+	routesapi "github.com/openshift/origin/pkg/route/api"
10
+)
11
+
12
+const (
13
+	testRoutesKey = "routes"
14
+)
15
+
16
+type fakeRoutesDiagnostic struct {
17
+	fakeDiagnostic
18
+	fakeRoutes   routesapi.RouteList
19
+	clienterrors map[string]error
20
+}
21
+
22
+func newFakeRoutesDiagnostic(t *testing.T) *fakeRoutesDiagnostic {
23
+	return &fakeRoutesDiagnostic{
24
+		fakeDiagnostic: *newFakeDiagnostic(t),
25
+		clienterrors:   map[string]error{},
26
+	}
27
+}
28
+
29
+func (f *fakeRoutesDiagnostic) addRouteWith(condType routesapi.RouteIngressConditionType, condStatus kapi.ConditionStatus, cert string, key string) {
30
+	ingress := routesapi.RouteIngress{
31
+		Conditions: []routesapi.RouteIngressCondition{
32
+			{
33
+				Type:   condType,
34
+				Status: condStatus,
35
+			},
36
+		},
37
+	}
38
+	route := routesapi.Route{
39
+		ObjectMeta: kapi.ObjectMeta{Name: "aname"},
40
+		Status: routesapi.RouteStatus{
41
+			Ingress: []routesapi.RouteIngress{ingress},
42
+		},
43
+	}
44
+	if len(cert) != 0 && len(key) != 0 {
45
+		tls := routesapi.TLSConfig{
46
+			Certificate: cert,
47
+			Key:         key,
48
+		}
49
+		route.Spec.TLS = &tls
50
+	}
51
+	f.fakeRoutes.Items = append(f.fakeRoutes.Items, route)
52
+}
53
+
54
+func (f *fakeRoutesDiagnostic) routes(project string, options kapi.ListOptions) (*routesapi.RouteList, error) {
55
+	value, ok := f.clienterrors[testRoutesKey]
56
+	if ok {
57
+		return nil, value
58
+	}
59
+	return &f.fakeRoutes, nil
60
+}
61
+
62
+func TestRouteWhenErrorFromClient(t *testing.T) {
63
+	d := newFakeRoutesDiagnostic(t)
64
+	d.clienterrors[testRoutesKey] = errors.New("some client error")
65
+
66
+	checkRoutes(d, d, fakeProject)
67
+	d.assertMessage("AGL0305", "Exp an error when there is a client error retrieving routes", log.ErrorLevel)
68
+	d.dumpMessages()
69
+}
70
+
71
+func TestRouteWhenZeroRoutesAvailable(t *testing.T) {
72
+	d := newFakeRoutesDiagnostic(t)
73
+
74
+	checkRoutes(d, d, fakeProject)
75
+	d.assertMessage("AGL0310", "Exp an error when there are no routes to support logging", log.ErrorLevel)
76
+	d.dumpMessages()
77
+}
78
+
79
+//test error route != accepted
80
+func TestRouteWhenRouteNotAccepted(t *testing.T) {
81
+	d := newFakeRoutesDiagnostic(t)
82
+	d.addRouteWith(routesapi.RouteExtendedValidationFailed, kapi.ConditionTrue, "", "")
83
+
84
+	checkRoutes(d, d, fakeProject)
85
+	d.assertMessage("AGL0325", "Exp an error when a route was not accepted", log.ErrorLevel)
86
+	d.assertMessage("AGL0331", "Exp to skip the cert check since none specified", log.DebugLevel)
87
+	d.dumpMessages()
88
+}
89
+func TestRouteWhenRouteAccepted(t *testing.T) {
90
+	d := newFakeRoutesDiagnostic(t)
91
+	d.addRouteWith(routesapi.RouteAdmitted, kapi.ConditionTrue, "", "")
92
+
93
+	checkRoutes(d, d, fakeProject)
94
+	d.assertNoErrors()
95
+	d.dumpMessages()
96
+}
97
+
98
+func TestRouteWhenErrorDecodingCert(t *testing.T) {
99
+	d := newFakeRoutesDiagnostic(t)
100
+	d.addRouteWith(routesapi.RouteExtendedValidationFailed, kapi.ConditionTrue, "cert", "key")
101
+
102
+	checkRoutes(d, d, fakeProject)
103
+	d.assertMessage("AGL0350", "Exp an error when unable to decode cert", log.ErrorLevel)
104
+	d.dumpMessages()
105
+}
106
+
107
+func TestRouteWhenErrorParsingCert(t *testing.T) {
108
+	d := newFakeRoutesDiagnostic(t)
109
+	d.addRouteWith(routesapi.RouteExtendedValidationFailed, kapi.ConditionTrue, "cert", "key")
110
+
111
+	checkRoutes(d, d, fakeProject)
112
+	d.assertMessage("AGL0350", "Exp an error when unable to decode cert", log.ErrorLevel)
113
+	d.dumpMessages()
114
+}
0 115
new file mode 100644
... ...
@@ -0,0 +1,37 @@
0
+package aggregated_logging
1
+
2
+import (
3
+	"fmt"
4
+
5
+	"k8s.io/kubernetes/pkg/util/sets"
6
+)
7
+
8
+const sccPrivilegedName = "privileged"
9
+
10
+var sccPrivilegedNames = sets.NewString(fluentdServiceAccountName)
11
+
12
+const sccPrivilegedUnboundServiceAccount = `
13
+The ServiceAccount '%[1]s' does not have a privileged SecurityContextConstraint for project '%[2]s'.  As a
14
+user with a cluster-admin role, you can grant the permissions by running
15
+the following:
16
+
17
+  oadm policy add-scc-to-user privileged system:serviceaccount:%[2]s:%[1]s
18
+`
19
+
20
+func checkSccs(r diagnosticReporter, adapter sccAdapter, project string) {
21
+	r.Debug("AGL0700", "Checking SecurityContextConstraints...")
22
+	scc, err := adapter.getScc(sccPrivilegedName)
23
+	if err != nil {
24
+		r.Error("AGL0705", err, fmt.Sprintf("There was an error while trying to retrieve the SecurityContextConstraints for the logging stack: %s", err))
25
+		return
26
+	}
27
+	privilegedUsers := sets.NewString()
28
+	for _, user := range scc.Users {
29
+		privilegedUsers.Insert(user)
30
+	}
31
+	for _, name := range sccPrivilegedNames.List() {
32
+		if !privilegedUsers.Has(fmt.Sprintf("system:serviceaccount:%s:%s", project, name)) {
33
+			r.Error("AGL0710", nil, fmt.Sprintf(sccPrivilegedUnboundServiceAccount, name, project))
34
+		}
35
+	}
36
+}
0 37
new file mode 100644
... ...
@@ -0,0 +1,65 @@
0
+package aggregated_logging
1
+
2
+import (
3
+	"encoding/json"
4
+	"errors"
5
+	"fmt"
6
+	"testing"
7
+
8
+	kapi "k8s.io/kubernetes/pkg/api"
9
+
10
+	"github.com/openshift/origin/pkg/diagnostics/log"
11
+)
12
+
13
+type fakeSccDiagnostic struct {
14
+	fakeDiagnostic
15
+	fakeScc kapi.SecurityContextConstraints
16
+}
17
+
18
+func newFakeSccDiagnostic(t *testing.T) *fakeSccDiagnostic {
19
+	return &fakeSccDiagnostic{
20
+		fakeDiagnostic: *newFakeDiagnostic(t),
21
+	}
22
+}
23
+
24
+func (f *fakeSccDiagnostic) getScc(name string) (*kapi.SecurityContextConstraints, error) {
25
+	json, _ := json.Marshal(f.fakeScc)
26
+	f.test.Logf(">> test#getScc(%s), err: %s, scc:%s", name, f.err, string(json))
27
+	if f.err != nil {
28
+		return nil, f.err
29
+	}
30
+	return &f.fakeScc, nil
31
+}
32
+
33
+func (f *fakeSccDiagnostic) addSccFor(name string, project string) {
34
+	f.fakeScc.Users = append(f.fakeScc.Users, fmt.Sprintf("system:serviceaccount:%s:%s", project, name))
35
+}
36
+
37
+func TestCheckSccWhenClientReturnsError(t *testing.T) {
38
+	d := newFakeSccDiagnostic(t)
39
+	d.err = errors.New("client error")
40
+
41
+	checkSccs(d, d, fakeProject)
42
+
43
+	d.assertMessage("AGL0705", "Exp error when client returns error getting SCC", log.ErrorLevel)
44
+	d.dumpMessages()
45
+}
46
+
47
+func TestCheckSccWhenMissingPrivelegedUsers(t *testing.T) {
48
+	d := newFakeSccDiagnostic(t)
49
+
50
+	checkSccs(d, d, fakeProject)
51
+
52
+	d.assertMessage("AGL0710", "Exp error when SCC is missing exp service acount", log.ErrorLevel)
53
+	d.dumpMessages()
54
+}
55
+
56
+func TestCheckSccWhenEverytingExists(t *testing.T) {
57
+	d := newFakeSccDiagnostic(t)
58
+	d.addSccFor(fluentdServiceAccountName, fakeProject)
59
+
60
+	checkSccs(d, d, fakeProject)
61
+
62
+	d.assertNoErrors()
63
+	d.dumpMessages()
64
+}
0 65
new file mode 100644
... ...
@@ -0,0 +1,38 @@
0
+package aggregated_logging
1
+
2
+import (
3
+	"fmt"
4
+	"strings"
5
+
6
+	kapi "k8s.io/kubernetes/pkg/api"
7
+	"k8s.io/kubernetes/pkg/util/sets"
8
+)
9
+
10
+var serviceAccountNames = sets.NewString("logging-deployer", "aggregated-logging-kibana", "aggregated-logging-curator", "aggregated-logging-elasticsearch", fluentdServiceAccountName)
11
+
12
+const serviceAccountsMissing = `
13
+Did not find ServiceAccounts: %s.  The logging infrastructure will not function 
14
+properly without them.  You may need to re-run the installer.
15
+`
16
+
17
+func checkServiceAccounts(d diagnosticReporter, f saAdapter, project string) {
18
+	d.Debug("AGL0500", fmt.Sprintf("Checking ServiceAccounts in project '%s'...", project))
19
+	saList, err := f.serviceAccounts(project, kapi.ListOptions{})
20
+	if err != nil {
21
+		d.Error("AGL0505", err, fmt.Sprintf("There was an error while trying to retrieve the pods for the AggregatedLogging stack: %s", err))
22
+		return
23
+	}
24
+	foundNames := sets.NewString()
25
+	for _, sa := range saList.Items {
26
+		foundNames.Insert(sa.ObjectMeta.Name)
27
+	}
28
+	missing := sets.NewString()
29
+	for _, name := range serviceAccountNames.List() {
30
+		if !foundNames.Has(name) {
31
+			missing.Insert(name)
32
+		}
33
+	}
34
+	if missing.Len() != 0 {
35
+		d.Error("AGL0515", nil, fmt.Sprintf(serviceAccountsMissing, strings.Join(missing.List(), ",")))
36
+	}
37
+}
0 38
new file mode 100644
... ...
@@ -0,0 +1,64 @@
0
+package aggregated_logging
1
+
2
+import (
3
+	"errors"
4
+	"testing"
5
+
6
+	kapi "k8s.io/kubernetes/pkg/api"
7
+
8
+	"github.com/openshift/origin/pkg/diagnostics/log"
9
+)
10
+
11
+type mockServiceAccountDiagnostic struct {
12
+	accounts kapi.ServiceAccountList
13
+	fakeDiagnostic
14
+}
15
+
16
+func newMockServiceAccountDiagnostic(t *testing.T) *mockServiceAccountDiagnostic {
17
+	return &mockServiceAccountDiagnostic{
18
+		accounts:       kapi.ServiceAccountList{},
19
+		fakeDiagnostic: *newFakeDiagnostic(t),
20
+	}
21
+}
22
+
23
+func (m *mockServiceAccountDiagnostic) serviceAccounts(project string, options kapi.ListOptions) (*kapi.ServiceAccountList, error) {
24
+	if m.err != nil {
25
+		return &m.accounts, m.err
26
+	}
27
+	return &m.accounts, nil
28
+}
29
+
30
+func (d *mockServiceAccountDiagnostic) addServiceAccountNamed(name string) {
31
+	meta := kapi.ObjectMeta{Name: name}
32
+	d.accounts.Items = append(d.accounts.Items, kapi.ServiceAccount{ObjectMeta: meta})
33
+}
34
+
35
+func TestCheckingServiceAccountsWhenFailedResponseFromClient(t *testing.T) {
36
+	d := newMockServiceAccountDiagnostic(t)
37
+	d.err = errors.New("Some Error")
38
+	checkServiceAccounts(d, d, fakeProject)
39
+	d.assertMessage("AGL0505",
40
+		"Exp an error when unable to retrieve service accounts because of a client error",
41
+		log.ErrorLevel)
42
+}
43
+
44
+func TestCheckingServiceAccountsWhenMissingExpectedServiceAccount(t *testing.T) {
45
+	d := newMockServiceAccountDiagnostic(t)
46
+	d.addServiceAccountNamed("foobar")
47
+
48
+	checkServiceAccounts(d, d, fakeProject)
49
+	d.assertMessage("AGL0515",
50
+		"Exp an error when an expected service account was not found.",
51
+		log.ErrorLevel)
52
+}
53
+
54
+func TestCheckingServiceAccountsIsOk(t *testing.T) {
55
+	d := newMockServiceAccountDiagnostic(t)
56
+
57
+	for _, name := range serviceAccountNames.List() {
58
+		d.addServiceAccountNamed(name)
59
+	}
60
+
61
+	checkServiceAccounts(d, d, fakeProject)
62
+	d.assertNoErrors()
63
+}
0 64
new file mode 100644
... ...
@@ -0,0 +1,61 @@
0
+package aggregated_logging
1
+
2
+import (
3
+	"fmt"
4
+	"strings"
5
+
6
+	kapi "k8s.io/kubernetes/pkg/api"
7
+	"k8s.io/kubernetes/pkg/util/sets"
8
+)
9
+
10
+var loggingServices = sets.NewString("logging-es", "logging-es-cluster", "logging-es-ops", "logging-es-ops-cluster", "logging-kibana", "logging-kibana-ops")
11
+
12
+const serviceNotFound = `
13
+Expected to find '%s' among the logging services for the project but did not.  
14
+`
15
+const serviceOpsNotFound = `
16
+Expected to find '%s' among the logging services for the project but did not. This
17
+may not matter if you chose not to install a separate logging stack to support operations.
18
+`
19
+
20
+// checkServices looks to see if the aggregated logging services exist
21
+func checkServices(r diagnosticReporter, adapter servicesAdapter, project string) {
22
+	r.Debug("AGL0200", fmt.Sprintf("Checking for services in project '%s' with selector '%s'", project, loggingSelector.AsSelector()))
23
+	serviceList, err := adapter.services(project, kapi.ListOptions{LabelSelector: loggingSelector.AsSelector()})
24
+	if err != nil {
25
+		r.Error("AGL0205", err, fmt.Sprintf("There was an error while trying to retrieve the logging services: %s", err))
26
+		return
27
+	}
28
+	foundServices := sets.NewString()
29
+	for _, service := range serviceList.Items {
30
+		foundServices.Insert(service.ObjectMeta.Name)
31
+		r.Debug("AGL0210", fmt.Sprintf("Retrieved service '%s'", service.ObjectMeta.Name))
32
+	}
33
+	for _, service := range loggingServices.List() {
34
+		if foundServices.Has(service) {
35
+			checkServiceEndpoints(r, adapter, project, service)
36
+		} else {
37
+			if strings.Contains(service, "-ops") {
38
+				r.Warn("AGL0215", nil, fmt.Sprintf(serviceOpsNotFound, service))
39
+			} else {
40
+				r.Error("AGL0217", nil, fmt.Sprintf(serviceNotFound, service))
41
+			}
42
+		}
43
+	}
44
+}
45
+
46
+// checkServiceEndpoints validates if there is an available endpoint for the service.
47
+func checkServiceEndpoints(r diagnosticReporter, adapter servicesAdapter, project string, service string) {
48
+	endpoints, err := adapter.endpointsForService(project, service)
49
+	if err != nil {
50
+		r.Warn("AGL0220", err, fmt.Sprintf("Unable to retrieve endpoints for service '%s': %s", service, err))
51
+		return
52
+	}
53
+	if len(endpoints.Subsets) == 0 {
54
+		if strings.Contains(service, "-ops") {
55
+			r.Info("AGL0223", fmt.Sprintf("There are no endpoints found for service '%s'. This could mean you choose not to install a separate operations cluster during installation.", service))
56
+		} else {
57
+			r.Warn("AGL0225", nil, fmt.Sprintf("There are no endpoints found for service '%s'. This means there are no pods serviced by this service and this component is not functioning", service))
58
+		}
59
+	}
60
+}
0 61
new file mode 100644
... ...
@@ -0,0 +1,104 @@
0
+package aggregated_logging
1
+
2
+import (
3
+	"errors"
4
+	"testing"
5
+
6
+	"github.com/openshift/origin/pkg/diagnostics/log"
7
+	kapi "k8s.io/kubernetes/pkg/api"
8
+)
9
+
10
+type fakeServicesDiagnostic struct {
11
+	list kapi.ServiceList
12
+	fakeDiagnostic
13
+	endpoints   map[string]kapi.Endpoints
14
+	endpointErr error
15
+}
16
+
17
+func newFakeServicesDiagnostic(t *testing.T) *fakeServicesDiagnostic {
18
+	return &fakeServicesDiagnostic{
19
+		list:           kapi.ServiceList{},
20
+		fakeDiagnostic: *newFakeDiagnostic(t),
21
+		endpoints:      map[string]kapi.Endpoints{},
22
+	}
23
+}
24
+
25
+func (f *fakeServicesDiagnostic) services(project string, options kapi.ListOptions) (*kapi.ServiceList, error) {
26
+	if f.err != nil {
27
+		return &f.list, f.err
28
+	}
29
+	return &f.list, nil
30
+}
31
+func (f *fakeServicesDiagnostic) endpointsForService(project string, service string) (*kapi.Endpoints, error) {
32
+	if f.endpointErr != nil {
33
+		return nil, f.endpointErr
34
+	}
35
+	endpoints, _ := f.endpoints[service]
36
+	return &endpoints, nil
37
+}
38
+
39
+func (f *fakeServicesDiagnostic) addEndpointSubsetTo(service string) {
40
+	endpoints := kapi.Endpoints{}
41
+	endpoints.Subsets = []kapi.EndpointSubset{{}}
42
+	f.endpoints[service] = endpoints
43
+}
44
+
45
+func (f *fakeServicesDiagnostic) addServiceNamed(name string) {
46
+	meta := kapi.ObjectMeta{Name: name}
47
+	f.list.Items = append(f.list.Items, kapi.Service{ObjectMeta: meta})
48
+}
49
+
50
+// test error from client
51
+func TestCheckingServicesWhenFailedResponseFromClient(t *testing.T) {
52
+	d := newFakeServicesDiagnostic(t)
53
+	d.err = errors.New("an error")
54
+	checkServices(d, d, fakeProject)
55
+	d.assertMessage("AGL0205",
56
+		"Exp an error when unable to retrieve services because of a client error",
57
+		log.ErrorLevel)
58
+}
59
+
60
+func TestCheckingServicesWhenMissingServices(t *testing.T) {
61
+	d := newFakeServicesDiagnostic(t)
62
+	d.addServiceNamed("logging-es")
63
+
64
+	checkServices(d, d, fakeProject)
65
+	d.assertMessage("AGL0215",
66
+		"Exp an warning when an expected sercies is not found",
67
+		log.WarnLevel)
68
+}
69
+
70
+func TestCheckingServicesWarnsWhenRetrievingEndpointsErrors(t *testing.T) {
71
+	d := newFakeServicesDiagnostic(t)
72
+	d.addServiceNamed("logging-es")
73
+	d.endpointErr = errors.New("an endpoint error")
74
+
75
+	checkServices(d, d, fakeProject)
76
+	d.assertMessage("AGL0220",
77
+		"Exp a warning when there is an error retrieving endpoints for a service",
78
+		log.WarnLevel)
79
+}
80
+
81
+func TestCheckingServicesWarnsWhenServiceHasNoEndpoints(t *testing.T) {
82
+	d := newFakeServicesDiagnostic(t)
83
+	for _, service := range loggingServices.List() {
84
+		d.addServiceNamed(service)
85
+	}
86
+
87
+	checkServices(d, d, fakeProject)
88
+	d.assertMessage("AGL0225",
89
+		"Exp a warning when an expected service has no endpoints",
90
+		log.WarnLevel)
91
+}
92
+
93
+func TestCheckingServicesHasNoErrorsOrWarningsForExpServices(t *testing.T) {
94
+	d := newFakeServicesDiagnostic(t)
95
+	for _, service := range loggingServices.List() {
96
+		d.addServiceNamed(service)
97
+		d.addEndpointSubsetTo(service)
98
+	}
99
+
100
+	checkServices(d, d, fakeProject)
101
+	d.assertNoErrors()
102
+	d.assertNoWarnings()
103
+}