GitList

Browse code

[origin-aggregated-logging 207] Add diagnostics for aggregated logging

Jeff Cantrill authored on 2016/08/23 06:13:02
Showing 27 changed files

.gitignore index 31db670..3a860a6 100644
docs/man/man1/oadm-diagnostics.1 index 83c4716..fdd29d5 100644
docs/man/man1/oc-adm-diagnostics.1 index e142615..3be7d64 100644
docs/man/man1/openshift-admin-diagnostics.1 index 4ac9dc1..ebdff74 100644
docs/man/man1/openshift-cli-adm-diagnostics.1 index d0ba5fd..30a71cd 100644
docs/man/man1/openshift-ex-diagnostics.1 index 668f2b4..d2b4643 100644
pkg/cmd/admin/diagnostics/cluster.go index ce413b6..45ede81 100644
pkg/cmd/admin/diagnostics/diagnostics.go index 9b77625..445a04b 100644
pkg/diagnostics/README.md index 8a432c1..f410c98 100644
pkg/diagnostics/cluster/aggregated_logging/clusterrolebindings.go index 0000000..3b418af
pkg/diagnostics/cluster/aggregated_logging/clusterrolebindings_test.go index 0000000..726f0d5
pkg/diagnostics/cluster/aggregated_logging/daemonsets.go index 0000000..824ecae
pkg/diagnostics/cluster/aggregated_logging/daemonsets_test.go index 0000000..5426271
pkg/diagnostics/cluster/aggregated_logging/deploymentconfigs.go index 0000000..5052552
pkg/diagnostics/cluster/aggregated_logging/deploymentconfigs_test.go index 0000000..d12e417
pkg/diagnostics/cluster/aggregated_logging/diagnostic.go index 0000000..dd2060e
pkg/diagnostics/cluster/aggregated_logging/diagnostic_test.go index 0000000..8381351
pkg/diagnostics/cluster/aggregated_logging/interfaces.go index 0000000..7144d2a
pkg/diagnostics/cluster/aggregated_logging/kibana.go index 0000000..9ec7790
pkg/diagnostics/cluster/aggregated_logging/routes.go index 0000000..5fe893c
pkg/diagnostics/cluster/aggregated_logging/routes_test.go index 0000000..d5620a8
pkg/diagnostics/cluster/aggregated_logging/scc.go index 0000000..d92b452
pkg/diagnostics/cluster/aggregated_logging/scc_test.go index 0000000..d2478f0
pkg/diagnostics/cluster/aggregated_logging/serviceaccounts.go index 0000000..1ffdf23
pkg/diagnostics/cluster/aggregated_logging/serviceaccounts_test.go index 0000000..43cca47
pkg/diagnostics/cluster/aggregated_logging/services.go index 0000000..7f10526
pkg/diagnostics/cluster/aggregated_logging/services_test.go index 0000000..2efcb9e

@@ -4,6 +4,7 @@
                      /.project
                      /.vagrant
                      /.vscode
                     +/.settings
                      /cpu.pprof
                      /assets/app/config.local.js
                      /assets/nbproject

docs/man/man1/oadm-diagnostics.1

History View file @ 1fbfe81

@@ -53,8 +53,19 @@ diagnostics to run which regular users cannot.
                      are skipped.
                      .PP
                      Diagnostics may be individually run by passing diagnostic name as arguments.
+                    +
                     +.PP
                     +.RS
+                    +
                     +.nf
                     +oadm diagnostics <DiagnosticName>
+                    +
                     +.fi
                     +.RE
+                    +
                     +.PP
                      The available diagnostic names are:
                     -AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus
                     +AggregatedLogging AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus
                      .SH OPTIONS

docs/man/man1/oc-adm-diagnostics.1

History View file @ 1fbfe81

@@ -53,8 +53,19 @@ diagnostics to run which regular users cannot.
                      are skipped.
                      .PP
                      Diagnostics may be individually run by passing diagnostic name as arguments.
+                    +
                     +.PP
                     +.RS
+                    +
                     +.nf
                     +oc adm diagnostics <DiagnosticName>
+                    +
                     +.fi
                     +.RE
+                    +
                     +.PP
                      The available diagnostic names are:
                     -AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus
                     +AggregatedLogging AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus
                      .SH OPTIONS

docs/man/man1/openshift-admin-diagnostics.1

History View file @ 1fbfe81

@@ -53,8 +53,19 @@ diagnostics to run which regular users cannot.
                      are skipped.
                      .PP
                      Diagnostics may be individually run by passing diagnostic name as arguments.
+                    +
                     +.PP
                     +.RS
+                    +
                     +.nf
                     +openshift admin diagnostics <DiagnosticName>
+                    +
                     +.fi
                     +.RE
+                    +
                     +.PP
                      The available diagnostic names are:
                     -AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus
                     +AggregatedLogging AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus
                      .SH OPTIONS

docs/man/man1/openshift-cli-adm-diagnostics.1

History View file @ 1fbfe81

@@ -53,8 +53,19 @@ diagnostics to run which regular users cannot.
                      are skipped.
                      .PP
                      Diagnostics may be individually run by passing diagnostic name as arguments.
+                    +
                     +.PP
                     +.RS
+                    +
                     +.nf
                     +openshift cli adm diagnostics <DiagnosticName>
+                    +
                     +.fi
                     +.RE
+                    +
                     +.PP
                      The available diagnostic names are:
                     -AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus
                     +AggregatedLogging AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus
                      .SH OPTIONS

docs/man/man1/openshift-ex-diagnostics.1

History View file @ 1fbfe81

@@ -53,8 +53,19 @@ diagnostics to run which regular users cannot.
                      are skipped.
                      .PP
                      Diagnostics may be individually run by passing diagnostic name as arguments.
+                    +
                     +.PP
                     +.RS
+                    +
                     +.nf
                     +openshift ex diagnostics <DiagnosticName>
+                    +
                     +.fi
                     +.RE
+                    +
                     +.PP
                      The available diagnostic names are:
                     -AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus
                     +AggregatedLogging AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus
                      .SH OPTIONS

pkg/cmd/admin/diagnostics/cluster.go

History View file @ 1fbfe81

@@ -14,13 +14,24 @@ import (
                      	"github.com/openshift/origin/pkg/client"
                      	osclientcmd "github.com/openshift/origin/pkg/cmd/util/clientcmd"
                      	clustdiags "github.com/openshift/origin/pkg/diagnostics/cluster"
                     +	agldiags "github.com/openshift/origin/pkg/diagnostics/cluster/aggregated_logging"
                      	"github.com/openshift/origin/pkg/diagnostics/types"
+                     )
                      var (
                      	// availableClusterDiagnostics contains the names of cluster diagnostics that can be executed
                      	// during a single run of diagnostics. Add more diagnostics to the list as they are defined.
                     -	availableClusterDiagnostics = sets.NewString(clustdiags.NodeDefinitionsName, clustdiags.ClusterRegistryName, clustdiags.ClusterRouterName, clustdiags.ClusterRolesName, clustdiags.ClusterRoleBindingsName, clustdiags.MasterNodeName, clustdiags.MetricsApiProxyName, clustdiags.ServiceExternalIPsName)
                     +	availableClusterDiagnostics = sets.NewString(
                     +		agldiags.AggregatedLoggingName,
                     +		clustdiags.ClusterRegistryName,
                     +		clustdiags.ClusterRouterName,
                     +		clustdiags.ClusterRolesName,
                     +		clustdiags.ClusterRoleBindingsName,
                     +		clustdiags.MasterNodeName,
                     +		clustdiags.MetricsApiProxyName,
                     +		clustdiags.NodeDefinitionsName,
                     +		clustdiags.ServiceExternalIPsName,
                     +	)
+                     )
                      // buildClusterDiagnostics builds cluster Diagnostic objects if a cluster-admin client can be extracted from the rawConfig passed in.
@@ -46,6 +57,8 @@ func (o DiagnosticsOptions) buildClusterDiagnostics(rawConfig *clientcmdapi.Conf
                      	for _, diagnosticName := range requestedDiagnostics {
                      		var d types.Diagnostic
                      		switch diagnosticName {
                     +		case agldiags.AggregatedLoggingName:
                     +			d = agldiags.NewAggregatedLogging(o.MasterConfigLocation, kclusterClient, clusterClient)
                      		case clustdiags.NodeDefinitionsName:
                      			d = &clustdiags.NodeDefinitions{KubeClient: kclusterClient, OsClient: clusterClient}
                      		case clustdiags.MasterNodeName:

pkg/cmd/admin/diagnostics/diagnostics.go

History View file @ 1fbfe81

@@ -80,6 +80,9 @@ you will receive an error if they are not found. For example:
                        are skipped.
                      Diagnostics may be individually run by passing diagnostic name as arguments.
+                    +
                     +    %[1]s <DiagnosticName>
+                    +
                      The available diagnostic names are:
                      %[2]s
+                     `

pkg/diagnostics/README.md

History View file @ 1fbfe81

@@ -95,7 +95,7 @@ save may be your own.
                      A diagnostic is an object that conforms to the Diagnostic interface
                      (see pkg/diagnostics/types/diagnostic.go). The diagnostic object should
                     -be built in one of the builders in the pkg/cmd/experimental/diagnostics
                     +be built in one of the builders in the pkg/cmd/admin/diagnostics
                      package (based on whether it depends on client, cluster-admin, or host
                      configuration). When executed, the diagnostic logs its findings into
                      a result object. It should be assumed that they may run in parallel.

pkg/diagnostics/cluster/aggregated_logging/clusterrolebindings.go

History View file @ 1fbfe81

                     new file mode 100644
@@ -0,0 +1,41 @@
                     +package aggregated_logging
+                    +
                     +import (
                     +	"fmt"
+                    +
                     +	"k8s.io/kubernetes/pkg/apis/rbac"
                     +	"k8s.io/kubernetes/pkg/util/sets"
                     +)
+                    +
                     +const clusterReaderRoleBindingName = "cluster-readers"
+                    +
                     +var clusterReaderRoleBindingNames = sets.NewString(fluentdServiceAccountName)
+                    +
                     +const clusterReaderUnboundServiceAccount = `
                     +The ServiceAccount '%[1]s' is not a cluster-reader in the '%[2]s' project.  This
                     +is required to enable Fluentd to look up pod metadata for the logs it gathers.
                     +As a user with a cluster-admin role, you can grant the permissions by running
                     +the following:
+                    +
                     +  oadm policy add-cluster-role-to-user cluster-reader system:serviceaccount:%[2]s:%[1]s
                     +`
+                    +
                     +func checkClusterRoleBindings(r diagnosticReporter, adapter clusterRoleBindingsAdapter, project string) {
                     +	r.Debug("AGL0600", "Checking ClusterRoleBindings...")
                     +	crb, err := adapter.getClusterRoleBinding(clusterReaderRoleBindingName)
                     +	if err != nil {
                     +		r.Error("AGL0605", err, fmt.Sprintf("There was an error while trying to retrieve the ClusterRoleBindings for the logging stack: %s", err))
                     +		return
                     +	}
                     +	boundServiceAccounts := sets.NewString()
                     +	for _, subject := range crb.Subjects {
                     +		if subject.Kind == rbac.ServiceAccountKind && subject.Namespace == project {
                     +			boundServiceAccounts.Insert(subject.Name)
                     +		}
                     +	}
                     +	for _, name := range clusterReaderRoleBindingNames.List() {
                     +		if !boundServiceAccounts.Has(name) {
                     +			r.Error("AGL0610", nil, fmt.Sprintf(clusterReaderUnboundServiceAccount, name, project))
                     +		}
                     +	}
                     +}

pkg/diagnostics/cluster/aggregated_logging/clusterrolebindings_test.go

History View file @ 1fbfe81

                     new file mode 100644
@@ -0,0 +1,70 @@
                     +package aggregated_logging
+                    +
                     +import (
                     +	"errors"
                     +	"testing"
+                    +
                     +	kapi "k8s.io/kubernetes/pkg/api"
                     +	"k8s.io/kubernetes/pkg/apis/rbac"
+                    +
                     +	authapi "github.com/openshift/origin/pkg/authorization/api"
                     +	"github.com/openshift/origin/pkg/diagnostics/log"
                     +)
+                    +
                     +type fakeRoleBindingDiagnostic struct {
                     +	fakeDiagnostic
                     +	fakeClusterRoleBinding authapi.ClusterRoleBinding
                     +}
+                    +
                     +func newFakeRoleBindingDiagnostic(t *testing.T) *fakeRoleBindingDiagnostic {
                     +	return &fakeRoleBindingDiagnostic{
                     +		fakeDiagnostic: *newFakeDiagnostic(t),
                     +	}
                     +}
+                    +
                     +func (f *fakeRoleBindingDiagnostic) getClusterRoleBinding(name string) (*authapi.ClusterRoleBinding, error) {
                     +	if f.err != nil {
                     +		return nil, f.err
                     +	}
                     +	return &f.fakeClusterRoleBinding, nil
                     +}
                     +func (f *fakeRoleBindingDiagnostic) addBinding(name string, namespace string) {
                     +	ref := kapi.ObjectReference{
                     +		Name:      name,
                     +		Kind:      rbac.ServiceAccountKind,
                     +		Namespace: namespace,
                     +	}
                     +	f.fakeClusterRoleBinding.Subjects = append(f.fakeClusterRoleBinding.Subjects, ref)
                     +}
+                    +
                     +//test error when client error
                     +func TestCheckClusterRoleBindingsWhenErrorFromClientRetrievingRoles(t *testing.T) {
                     +	d := newFakeRoleBindingDiagnostic(t)
                     +	d.err = errors.New("client error")
+                    +
                     +	checkClusterRoleBindings(d, d, fakeProject)
+                    +
                     +	d.assertMessage("AGL0605", "Exp. an error message if client error retrieving ClusterRoleBindings", log.ErrorLevel)
                     +	d.dumpMessages()
                     +}
+                    +
                     +func TestCheckClusterRoleBindingsWhenClusterReaderIsNotInProject(t *testing.T) {
                     +	d := newFakeRoleBindingDiagnostic(t)
                     +	d.addBinding("someName", "someRandomProject")
                     +	d.addBinding(fluentdServiceAccountName, fakeProject)
+                    +
                     +	checkClusterRoleBindings(d, d, fakeProject)
+                    +
                     +	d.assertNoErrors()
                     +	d.dumpMessages()
                     +}
+                    +
                     +func TestCheckClusterRoleBindingsWhenUnboundServiceAccounts(t *testing.T) {
                     +	d := newFakeRoleBindingDiagnostic(t)
                     +	d.addBinding(fluentdServiceAccountName, "someRandomProject")
+                    +
                     +	checkClusterRoleBindings(d, d, fakeProject)
+                    +
                     +	d.assertMessage("AGL0610", "Exp. an error when the exp service-accounts dont have cluster-reader access", log.ErrorLevel)
                     +	d.dumpMessages()
                     +}

pkg/diagnostics/cluster/aggregated_logging/daemonsets.go

History View file @ 1fbfe81

                     new file mode 100644
@@ -0,0 +1,118 @@
                     +package aggregated_logging
+                    +
                     +import (
                     +	"fmt"
+                    +
                     +	kapi "k8s.io/kubernetes/pkg/api"
                     +	kapisext "k8s.io/kubernetes/pkg/apis/extensions"
                     +	"k8s.io/kubernetes/pkg/labels"
                     +)
+                    +
                     +const daemonSetNoLabeledNodes = `
                     +There are no nodes that match the selector for DaemonSet '%[1]s'. This
                     +means Fluentd is not running and is not gathering logs from any nodes.
                     +An example of a command to target a specific node for this DaemonSet:
+                    +
                     +  oc label node/node1.example.com %[2]s
+                    +
                     +or to label them all:
+                    +
                     +  oc label node --all %[2]s
                     +`
+                    +
                     +const daemonSetPartialNodesLabeled = `
                     +There are some nodes that match the selector for DaemonSet '%s'.
                     +A list of matching nodes can be discovered by running:
+                    +
                     +  oc get nodes -l %s
                     +`
                     +const daemonSetNoPodsFound = `
                     +There were no pods found that match DaemonSet '%s' with matchLabels '%s'
                     +`
                     +const daemonSetPodsNotRunning = `
                     +The Pod '%[1]s' matched by DaemonSet '%[2]s' is not in '%[3]s' status: %[4]s.
+                    +
                     +Depending upon the state, this could mean there is an error running the image
                     +for one or more pod containers, the node could be pulling images, etc.  Try running
                     +the following commands to get additional information:
+                    +
                     +  oc describe pod %[1]s -n %[5]s
                     +  oc logs %[1]s -n %[5]s
                     +  oc get events -n %[5]s
                     +`
                     +const daemonSetNotFound = `
                     +There were no DaemonSets in project '%s' that included label '%s'.  This implies
                     +the Fluentd pods are not deployed or the logging stack needs to be upgraded.  Try
                     +running the installer to upgrade the logging stack.
                     +`
+                    +
                     +var loggingInfraFluentdSelector = labels.Set{loggingInfraKey: "fluentd"}
+                    +
                     +func checkDaemonSets(r diagnosticReporter, adapter daemonsetAdapter, project string) {
                     +	r.Debug("AGL0400", fmt.Sprintf("Checking DaemonSets in project '%s'...", project))
                     +	dsList, err := adapter.daemonsets(project, kapi.ListOptions{LabelSelector: loggingInfraFluentdSelector.AsSelector()})
                     +	if err != nil {
                     +		r.Error("AGL0405", err, fmt.Sprintf("There was an error while trying to retrieve the logging DaemonSets in project '%s' which is most likely transient: %s", project, err))
                     +		return
                     +	}
                     +	if len(dsList.Items) == 0 {
                     +		r.Error("AGL0407", err, fmt.Sprintf(daemonSetNotFound, project, loggingInfraFluentdSelector.AsSelector()))
                     +		return
                     +	}
                     +	nodeList, err := adapter.nodes(kapi.ListOptions{})
                     +	if err != nil {
                     +		r.Error("AGL0410", err, fmt.Sprintf("There was an error while trying to retrieve the list of Nodes which is most likely transient: %s", err))
                     +		return
                     +	}
                     +	for _, ds := range dsList.Items {
                     +		labeled := 0
                     +		nodeSelector := labels.Set(ds.Spec.Template.Spec.NodeSelector).AsSelector()
                     +		r.Debug("AGL0415", fmt.Sprintf("Checking DaemonSet '%s' nodeSelector '%s'", ds.ObjectMeta.Name, nodeSelector))
                     +		for _, node := range nodeList.Items {
                     +			if nodeSelector.Matches(labels.Set(node.Labels)) {
                     +				labeled = labeled + 1
                     +			}
                     +		}
                     +		switch {
                     +		case labeled == 0:
                     +			r.Error("AGL0420", nil, fmt.Sprintf(daemonSetNoLabeledNodes, ds.ObjectMeta.Name, nodeSelector))
                     +			break
                     +		case labeled < len(nodeList.Items):
                     +			r.Warn("AGL0425", nil, fmt.Sprintf(daemonSetPartialNodesLabeled, ds.ObjectMeta.Name, nodeSelector))
                     +			break
                     +		default:
                     +			r.Debug("AGL0430", fmt.Sprintf("DaemonSet '%s' matches all nodes", ds.ObjectMeta.Name))
                     +		}
                     +		if labeled > 0 {
                     +			checkDaemonSetPods(r, adapter, ds, project, labeled)
                     +		}
                     +	}
                     +}
+                    +
                     +func checkDaemonSetPods(r diagnosticReporter, adapter daemonsetAdapter, ds kapisext.DaemonSet, project string, numLabeledNodes int) {
                     +	if ds.Spec.Selector == nil {
                     +		r.Debug("AGL0455", "DaemonSet selector is nil. Unable to verify a pod is running")
                     +		return
                     +	}
                     +	podSelector := labels.Set(ds.Spec.Selector.MatchLabels).AsSelector()
                     +	r.Debug("AGL0435", fmt.Sprintf("Checking for running pods for DaemonSet '%s' with matchLabels '%s'", ds.ObjectMeta.Name, podSelector))
                     +	podList, err := adapter.pods(project, kapi.ListOptions{LabelSelector: podSelector})
                     +	if err != nil {
                     +		r.Error("AGL0438", err, fmt.Sprintf("There was an error retrieving pods matched to DaemonSet '%s' that is most likely transient: %s", ds.ObjectMeta.Name, err))
                     +		return
                     +	}
                     +	if len(podList.Items) == 0 {
                     +		r.Error("AGL0440", nil, fmt.Sprintf(daemonSetNoPodsFound, ds.ObjectMeta.Name, podSelector))
                     +		return
                     +	}
                     +	if len(podList.Items) != numLabeledNodes {
                     +		r.Error("AGL0443", nil, fmt.Sprintf("The number of deployed pods %s does not match the number of labeled nodes %d", len(podList.Items), numLabeledNodes))
                     +	}
                     +	for _, pod := range podList.Items {
                     +		if pod.Status.Phase != kapi.PodRunning {
                     +			podName := pod.ObjectMeta.Name
                     +			r.Error("AGL0445", nil, fmt.Sprintf(daemonSetPodsNotRunning, podName, ds.ObjectMeta.Name, kapi.PodRunning, pod.Status.Phase, project))
                     +		}
+                    +
                     +	}
                     +}

pkg/diagnostics/cluster/aggregated_logging/daemonsets_test.go

History View file @ 1fbfe81

                     new file mode 100644
@@ -0,0 +1,188 @@
                     +package aggregated_logging
+                    +
                     +import (
                     +	"errors"
                     +	"testing"
+                    +
                     +	kapi "k8s.io/kubernetes/pkg/api"
                     +	"k8s.io/kubernetes/pkg/api/unversioned"
                     +	kapisext "k8s.io/kubernetes/pkg/apis/extensions"
+                    +
                     +	"github.com/openshift/origin/pkg/diagnostics/log"
                     +)
+                    +
                     +const (
                     +	testPodsKey  = "pods"
                     +	testNodesKey = "nodes"
                     +	testDsKey    = "daemonsets"
                     +)
+                    +
                     +type fakeDaemonSetDiagnostic struct {
                     +	fakeDiagnostic
                     +	fakePods       kapi.PodList
                     +	fakeNodes      kapi.NodeList
                     +	fakeDaemonsets kapisext.DaemonSetList
                     +	clienterrors   map[string]error
                     +}
+                    +
                     +func newFakeDaemonSetDiagnostic(t *testing.T) *fakeDaemonSetDiagnostic {
                     +	return &fakeDaemonSetDiagnostic{
                     +		fakeDiagnostic: *newFakeDiagnostic(t),
                     +		clienterrors:   map[string]error{},
                     +	}
                     +}
+                    +
                     +func (f *fakeDaemonSetDiagnostic) addDsPodWithPhase(state kapi.PodPhase) {
                     +	pod := kapi.Pod{
                     +		Spec: kapi.PodSpec{},
                     +		Status: kapi.PodStatus{
                     +			Phase: state,
                     +		},
                     +	}
                     +	f.fakePods.Items = append(f.fakePods.Items, pod)
                     +}
+                    +
                     +func (f *fakeDaemonSetDiagnostic) addDaemonSetWithSelector(key string, value string) {
                     +	selector := map[string]string{key: value}
                     +	ds := kapisext.DaemonSet{
                     +		Spec: kapisext.DaemonSetSpec{
                     +			Template: kapi.PodTemplateSpec{
                     +				Spec: kapi.PodSpec{
                     +					NodeSelector: selector,
                     +				},
                     +			},
                     +			Selector: &unversioned.LabelSelector{MatchLabels: selector},
                     +		},
                     +	}
                     +	f.fakeDaemonsets.Items = append(f.fakeDaemonsets.Items, ds)
                     +}
+                    +
                     +func (f *fakeDaemonSetDiagnostic) addNodeWithLabel(key string, value string) {
                     +	labels := map[string]string{key: value}
                     +	node := kapi.Node{
                     +		ObjectMeta: kapi.ObjectMeta{
                     +			Labels: labels,
                     +		},
                     +	}
                     +	f.fakeNodes.Items = append(f.fakeNodes.Items, node)
                     +}
+                    +
                     +func (f *fakeDaemonSetDiagnostic) daemonsets(project string, options kapi.ListOptions) (*kapisext.DaemonSetList, error) {
                     +	value, ok := f.clienterrors[testDsKey]
                     +	if ok {
                     +		return nil, value
                     +	}
                     +	return &f.fakeDaemonsets, nil
                     +}
+                    +
                     +func (f *fakeDaemonSetDiagnostic) nodes(options kapi.ListOptions) (*kapi.NodeList, error) {
                     +	value, ok := f.clienterrors[testNodesKey]
                     +	if ok {
                     +		return nil, value
                     +	}
                     +	return &f.fakeNodes, nil
                     +}
+                    +
                     +func (f *fakeDaemonSetDiagnostic) pods(project string, options kapi.ListOptions) (*kapi.PodList, error) {
                     +	value, ok := f.clienterrors[testPodsKey]
                     +	if ok {
                     +		return nil, value
                     +	}
                     +	return &f.fakePods, nil
                     +}
+                    +
                     +func TestCheckDaemonsetsWhenErrorResponseFromClientRetrievingDaemonsets(t *testing.T) {
                     +	d := newFakeDaemonSetDiagnostic(t)
                     +	d.clienterrors[testDsKey] = errors.New("someerror")
+                    +
                     +	checkDaemonSets(d, d, fakeProject)
+                    +
                     +	d.assertMessage("AGL0405", "Exp. error when client errors on retrieving DaemonSets", log.ErrorLevel)
                     +}
+                    +
                     +func TestCheckDaemonsetsWhenNoDaemonsetsFound(t *testing.T) {
                     +	d := newFakeDaemonSetDiagnostic(t)
+                    +
                     +	checkDaemonSets(d, d, fakeProject)
+                    +
                     +	d.assertMessage("AGL0407", "Exp. error when client retrieves no DaemonSets", log.ErrorLevel)
                     +}
+                    +
                     +func TestCheckDaemonsetsWhenErrorResponseFromClientRetrievingNodes(t *testing.T) {
                     +	d := newFakeDaemonSetDiagnostic(t)
                     +	d.clienterrors[testNodesKey] = errors.New("someerror")
                     +	d.addDaemonSetWithSelector("foo", "bar")
+                    +
                     +	checkDaemonSets(d, d, fakeProject)
+                    +
                     +	d.assertMessage("AGL0410", "Exp. error when client errors on retrieving Nodes", log.ErrorLevel)
                     +}
+                    +
                     +func TestCheckDaemonsetsWhenDaemonsetsMatchNoNodes(t *testing.T) {
                     +	d := newFakeDaemonSetDiagnostic(t)
                     +	d.addDaemonSetWithSelector("foo", "bar")
                     +	d.addNodeWithLabel("foo", "xyz")
+                    +
                     +	checkDaemonSets(d, d, fakeProject)
+                    +
                     +	d.assertMessage("AGL0420", "Exp. error when daemonsets do not match any nodes", log.ErrorLevel)
                     +}
+                    +
                     +func TestCheckDaemonsetsWhenDaemonsetsMatchPartialNodes(t *testing.T) {
                     +	d := newFakeDaemonSetDiagnostic(t)
                     +	d.addDaemonSetWithSelector("foo", "bar")
                     +	d.addNodeWithLabel("foo", "bar")
                     +	d.addNodeWithLabel("foo", "xyz")
+                    +
                     +	checkDaemonSets(d, d, fakeProject)
+                    +
                     +	d.assertMessage("AGL0425", "Exp. warning when daemonsets matches less then all the nodes", log.WarnLevel)
                     +}
+                    +
                     +func TestCheckDaemonsetsWhenClientErrorsFetchingPods(t *testing.T) {
                     +	d := newFakeDaemonSetDiagnostic(t)
                     +	d.clienterrors[testPodsKey] = errors.New("some error")
                     +	d.addDaemonSetWithSelector("foo", "bar")
                     +	d.addNodeWithLabel("foo", "bar")
+                    +
                     +	checkDaemonSets(d, d, fakeProject)
+                    +
                     +	d.assertMessage("AGL0438", "Exp. error when there is an error retrieving pods for a daemonset", log.ErrorLevel)
+                    +
                     +	d.dumpMessages()
                     +}
+                    +
                     +func TestCheckDaemonsetsWhenNoPodsMatchDaemonSet(t *testing.T) {
                     +	d := newFakeDaemonSetDiagnostic(t)
                     +	d.addDaemonSetWithSelector("foo", "bar")
                     +	d.addNodeWithLabel("foo", "bar")
+                    +
                     +	checkDaemonSets(d, d, fakeProject)
+                    +
                     +	d.assertMessage("AGL0440", "Exp. error when there are no pods that match a daemonset", log.ErrorLevel)
                     +	d.dumpMessages()
                     +}
+                    +
                     +func TestCheckDaemonsetsWhenNoPodsInRunningState(t *testing.T) {
                     +	d := newFakeDaemonSetDiagnostic(t)
                     +	d.addDaemonSetWithSelector("foo", "bar")
                     +	d.addNodeWithLabel("foo", "bar")
                     +	d.addDsPodWithPhase(kapi.PodPending)
+                    +
                     +	checkDaemonSets(d, d, fakeProject)
+                    +
                     +	d.assertMessage("AGL0445", "Exp. error when there are no pods in running state", log.ErrorLevel)
                     +	d.dumpMessages()
                     +}
+                    +
                     +func TestCheckDaemonsetsWhenAllPodsInRunningState(t *testing.T) {
                     +	d := newFakeDaemonSetDiagnostic(t)
                     +	d.addDaemonSetWithSelector("foo", "bar")
                     +	d.addNodeWithLabel("foo", "bar")
                     +	d.addDsPodWithPhase(kapi.PodRunning)
+                    +
                     +	checkDaemonSets(d, d, fakeProject)
+                    +
                     +	d.assertNoErrors()
                     +	d.dumpMessages()
                     +}

pkg/diagnostics/cluster/aggregated_logging/deploymentconfigs.go

History View file @ 1fbfe81

                     new file mode 100644
@@ -0,0 +1,128 @@
                     +package aggregated_logging
+                    +
                     +import (
                     +	"fmt"
                     +	"strings"
+                    +
                     +	kapi "k8s.io/kubernetes/pkg/api"
                     +	"k8s.io/kubernetes/pkg/labels"
                     +	"k8s.io/kubernetes/pkg/selection"
                     +	"k8s.io/kubernetes/pkg/util/sets"
+                    +
                     +	deployapi "github.com/openshift/origin/pkg/deploy/api"
                     +)
+                    +
                     +const (
                     +	componentNameEs        = "es"
                     +	componentNameEsOps     = "es-ops"
                     +	componentNameKibana    = "kibana"
                     +	componentNameKibanaOps = "kibana-ops"
                     +	componentNameCurator   = "curator"
                     +)
+                    +
                     +// loggingComponents are those 'managed' by rep controllers (e.g. fluentd is deployed with a DaemonSet)
                     +var loggingComponents = sets.NewString(componentNameEs, componentNameEsOps, componentNameKibana, componentNameKibanaOps, componentNameCurator)
+                    +
                     +const deploymentConfigWarnMissingForOps = `
                     +Did not find a DeploymentConfig to support component '%s'.  If you require
                     +a separate ElasticSearch cluster to aggregate operations logs, please re-install
                     +or update logging and specify the appropriate switch to enable the ops cluster.
                     +`
+                    +
                     +const deploymentConfigZeroPodsFound = `
                     +There were no Pods found that support logging.  Try running
                     +the following commands for additional information:
+                    +
                     +  oc describe dc -n %[1]s
                     +  oc get events -n %[1]s
                     +`
                     +const deploymentConfigNoPodsFound = `
                     +There were no Pods found for DeploymentConfig '%[1]s'.  Try running
                     +the following commands for additional information:
+                    +
                     +  oc describe dc %[1]s -n %[2]s
                     +  oc get events -n %[2]s
                     +`
                     +const deploymentConfigPodsNotRunning = `
                     +The Pod '%[1]s' matched by DeploymentConfig '%[2]s' is not in '%[3]s' status: %[4]s.
+                    +
                     +Depending upon the state, this could mean there is an error running the image
                     +for one or more pod containers, the node could be pulling images, etc.  Try running
                     +the following commands for additional information:
+                    +
                     +  oc describe pod %[1]s -n %[5]s
                     +  oc logs %[1]s -n %[5]s
                     +  oc get events -n %[5]s
                     +`
+                    +
                     +func checkDeploymentConfigs(r diagnosticReporter, adapter deploymentConfigAdapter, project string) {
                     +	req, _ := labels.NewRequirement(loggingInfraKey, selection.Exists, nil)
                     +	selector := labels.NewSelector().Add(*req)
                     +	r.Debug("AGL0040", fmt.Sprintf("Checking for DeploymentConfigs in project '%s' with selector '%s'", project, selector))
                     +	dcList, err := adapter.deploymentconfigs(project, kapi.ListOptions{LabelSelector: selector})
                     +	if err != nil {
                     +		r.Error("AGL0045", err, fmt.Sprintf("There was an error while trying to retrieve the DeploymentConfigs in project '%s': %s", project, err))
                     +		return
                     +	}
                     +	if len(dcList.Items) == 0 {
                     +		r.Error("AGL0047", nil, fmt.Sprintf("Did not find any matching DeploymentConfigs in project '%s' which means no logging components were deployed.  Try running the installer.", project))
                     +		return
                     +	}
                     +	found := sets.NewString()
                     +	for _, entry := range dcList.Items {
                     +		comp := labels.Set(entry.ObjectMeta.Labels).Get(componentKey)
                     +		found.Insert(comp)
                     +		r.Debug("AGL0050", fmt.Sprintf("Found DeploymentConfig '%s' for component '%s'", entry.ObjectMeta.Name, comp))
                     +	}
                     +	for _, entry := range loggingComponents.List() {
                     +		exists := found.Has(entry)
                     +		if !exists {
                     +			if strings.HasSuffix(entry, "-ops") {
                     +				r.Info("AGL0060", fmt.Sprintf(deploymentConfigWarnMissingForOps, entry))
                     +			} else {
                     +				r.Error("AGL0065", nil, fmt.Sprintf("Did not find a DeploymentConfig to support component '%s'", entry))
                     +			}
                     +		}
                     +	}
                     +	checkDeploymentConfigPods(r, adapter, *dcList, project)
                     +}
+                    +
                     +func checkDeploymentConfigPods(r diagnosticReporter, adapter deploymentConfigAdapter, dcs deployapi.DeploymentConfigList, project string) {
                     +	compReq, _ := labels.NewRequirement(componentKey, selection.In, loggingComponents)
                     +	provReq, _ := labels.NewRequirement(providerKey, selection.Equals, sets.NewString(openshiftValue))
                     +	podSelector := labels.NewSelector().Add(*compReq, *provReq)
                     +	r.Debug("AGL0070", fmt.Sprintf("Getting pods that match selector '%s'", podSelector))
                     +	podList, err := adapter.pods(project, kapi.ListOptions{LabelSelector: podSelector})
                     +	if err != nil {
                     +		r.Error("AGL0075", err, fmt.Sprintf("There was an error while trying to retrieve the pods for the AggregatedLogging stack: %s", err))
                     +		return
                     +	}
                     +	if len(podList.Items) == 0 {
                     +		r.Error("AGL0080", nil, fmt.Sprintf(deploymentConfigZeroPodsFound, project))
                     +		return
                     +	}
                     +	dcPodCount := make(map[string]int, len(dcs.Items))
                     +	for _, dc := range dcs.Items {
                     +		dcPodCount[dc.ObjectMeta.Name] = 0
                     +	}
+                    +
                     +	for _, pod := range podList.Items {
                     +		r.Debug("AGL0082", fmt.Sprintf("Checking status of Pod '%s'...", pod.ObjectMeta.Name))
                     +		dcName, hasDcName := pod.ObjectMeta.Annotations[deployapi.DeploymentConfigAnnotation]
                     +		if !hasDcName {
                     +			r.Warn("AGL0085", nil, fmt.Sprintf("Found Pod '%s' that that does not reference a logging deployment config which may be acceptable. Skipping check to see if its running.", pod.ObjectMeta.Name))
                     +			continue
                     +		}
                     +		if pod.Status.Phase != kapi.PodRunning {
                     +			podName := pod.ObjectMeta.Name
                     +			r.Error("AGL0090", nil, fmt.Sprintf(deploymentConfigPodsNotRunning, podName, dcName, kapi.PodRunning, pod.Status.Phase, project))
                     +		}
                     +		count, _ := dcPodCount[dcName]
                     +		dcPodCount[dcName] = count + 1
                     +	}
                     +	for name, count := range dcPodCount {
                     +		if count == 0 {
                     +			r.Error("AGL0095", nil, fmt.Sprintf(deploymentConfigNoPodsFound, name, project))
                     +		}
                     +	}
                     +}

pkg/diagnostics/cluster/aggregated_logging/deploymentconfigs_test.go

History View file @ 1fbfe81

                     new file mode 100644
@@ -0,0 +1,153 @@
                     +package aggregated_logging
+                    +
                     +import (
                     +	"errors"
                     +	"testing"
+                    +
                     +	kapi "k8s.io/kubernetes/pkg/api"
+                    +
                     +	deployapi "github.com/openshift/origin/pkg/deploy/api"
                     +	"github.com/openshift/origin/pkg/diagnostics/log"
                     +)
+                    +
                     +const (
                     +	testDcPodsKey      = "pods"
                     +	testDcKey          = "deploymentconfigs"
                     +	testSkipAnnotation = "skipAddAnnoation"
                     +)
+                    +
                     +type fakeDeploymentConfigsDiagnostic struct {
                     +	fakeDiagnostic
                     +	fakePods     kapi.PodList
                     +	fakeDcs      deployapi.DeploymentConfigList
                     +	clienterrors map[string]error
                     +}
+                    +
                     +func newFakeDeploymentConfigsDiagnostic(t *testing.T) *fakeDeploymentConfigsDiagnostic {
                     +	return &fakeDeploymentConfigsDiagnostic{
                     +		fakeDiagnostic: *newFakeDiagnostic(t),
                     +		clienterrors:   map[string]error{},
                     +	}
                     +}
                     +func (f *fakeDeploymentConfigsDiagnostic) addDeployConfigFor(component string) {
                     +	labels := map[string]string{componentKey: component}
                     +	dc := deployapi.DeploymentConfig{
                     +		ObjectMeta: kapi.ObjectMeta{
                     +			Name:   component + "Name",
                     +			Labels: labels,
                     +		},
                     +	}
                     +	f.fakeDcs.Items = append(f.fakeDcs.Items, dc)
                     +}
+                    +
                     +func (f *fakeDeploymentConfigsDiagnostic) addPodFor(comp string, state kapi.PodPhase) {
                     +	annotations := map[string]string{}
                     +	if comp != testSkipAnnotation {
                     +		annotations[deployapi.DeploymentConfigAnnotation] = comp
                     +	}
                     +	pod := kapi.Pod{
                     +		ObjectMeta: kapi.ObjectMeta{
                     +			Name:        comp,
                     +			Annotations: annotations,
                     +		},
                     +		Spec: kapi.PodSpec{},
                     +		Status: kapi.PodStatus{
                     +			Phase: state,
                     +		},
                     +	}
                     +	f.fakePods.Items = append(f.fakePods.Items, pod)
                     +}
+                    +
                     +func (f *fakeDeploymentConfigsDiagnostic) deploymentconfigs(project string, options kapi.ListOptions) (*deployapi.DeploymentConfigList, error) {
                     +	f.test.Logf(">> calling deploymentconfigs: %s", f.clienterrors)
                     +	value, ok := f.clienterrors[testDcKey]
                     +	if ok {
                     +		f.test.Logf(">> error key found..returning %s", value)
                     +		return nil, value
                     +	}
                     +	f.test.Logf(">> error key not found..")
                     +	return &f.fakeDcs, nil
                     +}
+                    +
                     +func (f *fakeDeploymentConfigsDiagnostic) pods(project string, options kapi.ListOptions) (*kapi.PodList, error) {
                     +	value, ok := f.clienterrors[testDcPodsKey]
                     +	if ok {
                     +		return nil, value
                     +	}
                     +	return &f.fakePods, nil
                     +}
+                    +
                     +//test client error listing dcs
                     +func TestCheckDcWhenErrorResponseFromClientRetrievingDc(t *testing.T) {
                     +	d := newFakeDeploymentConfigsDiagnostic(t)
                     +	d.clienterrors[testDcKey] = errors.New("error")
+                    +
                     +	checkDeploymentConfigs(d, d, fakeProject)
+                    +
                     +	d.assertMessage("AGL0045", "Exp. an error when client returns error retrieving dcs", log.ErrorLevel)
                     +	d.dumpMessages()
                     +}
+                    +
                     +func TestCheckDcWhenNoDeployConfigsFound(t *testing.T) {
                     +	d := newFakeDeploymentConfigsDiagnostic(t)
+                    +
                     +	checkDeploymentConfigs(d, d, fakeProject)
+                    +
                     +	d.assertMessage("AGL0047", "Exp. an error when no DeploymentConfigs are found", log.ErrorLevel)
                     +	d.dumpMessages()
                     +}
+                    +
                     +func TestCheckDcWhenOpsOrOtherDeployConfigsMissing(t *testing.T) {
                     +	d := newFakeDeploymentConfigsDiagnostic(t)
                     +	d.addDeployConfigFor(componentNameEs)
+                    +
                     +	checkDeploymentConfigs(d, d, fakeProject)
+                    +
                     +	d.assertMessage("AGL0060", "Exp. a warning when ops DeploymentConfigs are missing", log.InfoLevel)
                     +	d.assertMessage("AGL0065", "Exp. an error when non-ops DeploymentConfigs are missing", log.ErrorLevel)
                     +	d.dumpMessages()
                     +}
+                    +
                     +func TestCheckDcWhenClientErrorListingPods(t *testing.T) {
                     +	d := newFakeDeploymentConfigsDiagnostic(t)
                     +	d.clienterrors[testDcPodsKey] = errors.New("New pod error")
                     +	for _, comp := range loggingComponents.List() {
                     +		d.addDeployConfigFor(comp)
                     +	}
+                    +
                     +	checkDeploymentConfigs(d, d, fakeProject)
+                    +
                     +	d.assertMessage("AGL0075", "Exp. an error when retrieving pods errors", log.ErrorLevel)
                     +	d.dumpMessages()
                     +}
+                    +
                     +func TestCheckDcWhenNoPodsFoundMatchingDeployConfig(t *testing.T) {
                     +	d := newFakeDeploymentConfigsDiagnostic(t)
                     +	for _, comp := range loggingComponents.List() {
                     +		d.addDeployConfigFor(comp)
                     +	}
+                    +
                     +	checkDeploymentConfigs(d, d, fakeProject)
+                    +
                     +	d.assertMessage("AGL0080", "Exp. an error when retrieving pods errors", log.ErrorLevel)
                     +	d.dumpMessages()
                     +}
+                    +
                     +func TestCheckDcWhenInVariousStates(t *testing.T) {
                     +	d := newFakeDeploymentConfigsDiagnostic(t)
                     +	for _, comp := range loggingComponents.List() {
                     +		d.addDeployConfigFor(comp)
                     +		d.addPodFor(comp, kapi.PodRunning)
                     +	}
                     +	d.addPodFor(testSkipAnnotation, kapi.PodRunning)
                     +	d.addPodFor("someothercomponent", kapi.PodPending)
                     +	d.addDeployConfigFor("somerandom component")
+                    +
                     +	checkDeploymentConfigs(d, d, fakeProject)
+                    +
                     +	d.assertMessage("AGL0085", "Exp. a warning when pod is missing DeployConfig annotation", log.WarnLevel)
                     +	d.assertMessage("AGL0090", "Exp. an error when pod is not in running state", log.ErrorLevel)
                     +	d.assertMessage("AGL0095", "Exp. an error when pods not found for a DeployConfig", log.ErrorLevel)
+                    +
                     +	d.dumpMessages()
                     +}

pkg/diagnostics/cluster/aggregated_logging/diagnostic.go

History View file @ 1fbfe81

                     new file mode 100644
@@ -0,0 +1,207 @@
                     +package aggregated_logging
+                    +
                     +import (
                     +	"errors"
                     +	"fmt"
                     +	"net/url"
+                    +
                     +	kapi "k8s.io/kubernetes/pkg/api"
                     +	kapisext "k8s.io/kubernetes/pkg/apis/extensions"
                     +	kclient "k8s.io/kubernetes/pkg/client/unversioned"
                     +	"k8s.io/kubernetes/pkg/labels"
+                    +
                     +	authapi "github.com/openshift/origin/pkg/authorization/api"
                     +	"github.com/openshift/origin/pkg/client"
                     +	configapi "github.com/openshift/origin/pkg/cmd/server/api"
                     +	deployapi "github.com/openshift/origin/pkg/deploy/api"
                     +	hostdiag "github.com/openshift/origin/pkg/diagnostics/host"
                     +	"github.com/openshift/origin/pkg/diagnostics/types"
                     +	routesapi "github.com/openshift/origin/pkg/route/api"
                     +)
+                    +
                     +// AggregatedLogging is a Diagnostic to check the configurations
                     +// and general integration of the OpenShift stack
                     +// for aggregating container logs
                     +// https://github.com/openshift/origin-aggregated-logging
                     +type AggregatedLogging struct {
                     +	masterConfig     *configapi.MasterConfig
                     +	MasterConfigFile string
                     +	OsClient         *client.Client
                     +	KubeClient       *kclient.Client
                     +	result           types.DiagnosticResult
                     +}
+                    +
                     +const (
                     +	AggregatedLoggingName = "AggregatedLogging"
+                    +
                     +	loggingInfraKey = "logging-infra"
                     +	componentKey    = "component"
                     +	providerKey     = "provider"
                     +	openshiftValue  = "openshift"
+                    +
                     +	fluentdServiceAccountName = "aggregated-logging-fluentd"
                     +)
+                    +
                     +var loggingSelector = labels.Set{loggingInfraKey: "support"}
+                    +
                     +//NewAggregatedLogging returns the AggregatedLogging Diagnostic
                     +func NewAggregatedLogging(masterConfigFile string, kclient *kclient.Client, osclient *client.Client) *AggregatedLogging {
                     +	return &AggregatedLogging{nil, masterConfigFile, osclient, kclient, types.NewDiagnosticResult(AggregatedLoggingName)}
                     +}
+                    +
                     +func (d *AggregatedLogging) getScc(name string) (*kapi.SecurityContextConstraints, error) {
                     +	return d.KubeClient.SecurityContextConstraints().Get(name)
                     +}
+                    +
                     +func (d *AggregatedLogging) getClusterRoleBinding(name string) (*authapi.ClusterRoleBinding, error) {
                     +	return d.OsClient.ClusterRoleBindings().Get(name)
                     +}
+                    +
                     +func (d *AggregatedLogging) routes(project string, options kapi.ListOptions) (*routesapi.RouteList, error) {
                     +	return d.OsClient.Routes(project).List(options)
                     +}
+                    +
                     +func (d *AggregatedLogging) serviceAccounts(project string, options kapi.ListOptions) (*kapi.ServiceAccountList, error) {
                     +	return d.KubeClient.ServiceAccounts(project).List(options)
                     +}
+                    +
                     +func (d *AggregatedLogging) services(project string, options kapi.ListOptions) (*kapi.ServiceList, error) {
                     +	return d.KubeClient.Services(project).List(options)
                     +}
+                    +
                     +func (d *AggregatedLogging) endpointsForService(project string, service string) (*kapi.Endpoints, error) {
                     +	return d.KubeClient.Endpoints(project).Get(service)
                     +}
+                    +
                     +func (d *AggregatedLogging) daemonsets(project string, options kapi.ListOptions) (*kapisext.DaemonSetList, error) {
                     +	return d.KubeClient.DaemonSets(project).List(kapi.ListOptions{LabelSelector: loggingInfraFluentdSelector.AsSelector()})
                     +}
+                    +
                     +func (d *AggregatedLogging) nodes(options kapi.ListOptions) (*kapi.NodeList, error) {
                     +	return d.KubeClient.Nodes().List(kapi.ListOptions{})
                     +}
+                    +
                     +func (d *AggregatedLogging) pods(project string, options kapi.ListOptions) (*kapi.PodList, error) {
                     +	return d.KubeClient.Pods(project).List(options)
                     +}
                     +func (d *AggregatedLogging) deploymentconfigs(project string, options kapi.ListOptions) (*deployapi.DeploymentConfigList, error) {
                     +	return d.OsClient.DeploymentConfigs(project).List(options)
                     +}
+                    +
                     +func (d *AggregatedLogging) Info(id string, message string) {
                     +	d.result.Info(id, message)
                     +}
+                    +
                     +func (d *AggregatedLogging) Error(id string, err error, message string) {
                     +	d.result.Error(id, err, message)
                     +}
+                    +
                     +func (d *AggregatedLogging) Debug(id string, message string) {
                     +	d.result.Debug(id, message)
                     +}
+                    +
                     +func (d *AggregatedLogging) Warn(id string, err error, message string) {
                     +	d.result.Warn(id, err, message)
                     +}
+                    +
                     +func (d *AggregatedLogging) Name() string {
                     +	return AggregatedLoggingName
                     +}
+                    +
                     +func (d *AggregatedLogging) Description() string {
                     +	return "Check aggregated logging integration for proper configuration"
                     +}
+                    +
                     +func (d *AggregatedLogging) CanRun() (bool, error) {
                     +	if len(d.MasterConfigFile) == 0 {
                     +		return false, errors.New("No master config file was provided")
                     +	}
                     +	if d.OsClient == nil {
                     +		return false, errors.New("Config must include a cluster-admin context to run this diagnostic")
                     +	}
                     +	if d.KubeClient == nil {
                     +		return false, errors.New("Config must include a cluster-admin context to run this diagnostic")
                     +	}
                     +	var err error
                     +	d.masterConfig, err = hostdiag.GetMasterConfig(d.result, d.MasterConfigFile)
                     +	if err != nil {
                     +		return false, errors.New("Unreadable master config; skipping this diagnostic.")
                     +	}
                     +	return true, nil
                     +}
+                    +
                     +func (d *AggregatedLogging) Check() types.DiagnosticResult {
                     +	project := retrieveLoggingProject(d.result, d.masterConfig, d.OsClient)
                     +	if len(project) != 0 {
                     +		checkServiceAccounts(d, d, project)
                     +		checkClusterRoleBindings(d, d, project)
                     +		checkSccs(d, d, project)
                     +		checkDeploymentConfigs(d, d, project)
                     +		checkDaemonSets(d, d, project)
                     +		checkServices(d, d, project)
                     +		checkRoutes(d, d, project)
                     +		checkKibana(d.result, d.OsClient, d.KubeClient, project)
                     +	}
                     +	return d.result
                     +}
+                    +
                     +const projectNodeSelectorWarning = `
                     +The project '%[1]s' was found with a non-empty node selector annotation.  This will keep
                     +Fluentd from running on certain nodes and collecting logs from the entire cluster.  You
                     +can correct it by editing the project:
+                    +
                     +  oc edit namespace %[1]s
+                    +
                     +and updating the annotation:
+                    +
                     +  'openshift.io/node-selector' : ""
+                    +
                     +`
+                    +
                     +func retrieveLoggingProject(r types.DiagnosticResult, masterCfg *configapi.MasterConfig, osClient *client.Client) string {
                     +	r.Debug("AGL0010", fmt.Sprintf("masterConfig.AssetConfig.LoggingPublicURL: '%s'", masterCfg.AssetConfig.LoggingPublicURL))
                     +	projectName := ""
                     +	if len(masterCfg.AssetConfig.LoggingPublicURL) == 0 {
                     +		r.Debug("AGL0017", "masterConfig.AssetConfig.LoggingPublicURL is empty")
                     +		return projectName
                     +	}
+                    +
                     +	loggingUrl, err := url.Parse(masterCfg.AssetConfig.LoggingPublicURL)
                     +	if err != nil {
                     +		r.Error("AGL0011", err, fmt.Sprintf("Unable to parse the loggingPublicURL from the masterConfig '%s'", masterCfg.AssetConfig.LoggingPublicURL))
                     +		return projectName
                     +	}
+                    +
                     +	routeList, err := osClient.Routes(kapi.NamespaceAll).List(kapi.ListOptions{LabelSelector: loggingSelector.AsSelector()})
                     +	if err != nil {
                     +		r.Error("AGL0012", err, fmt.Sprintf("There was an error while trying to find the route associated with '%s' which is probably transient: %s", loggingUrl, err))
                     +		return projectName
                     +	}
+                    +
                     +	for _, route := range routeList.Items {
                     +		r.Debug("AGL0013", fmt.Sprintf("Comparing URL to route.Spec.Host: %s", route.Spec.Host))
                     +		if loggingUrl.Host == route.Spec.Host {
                     +			if len(projectName) == 0 {
                     +				projectName = route.ObjectMeta.Namespace
                     +				r.Info("AGL0015", fmt.Sprintf("Found route '%s' matching logging URL '%s' in project: '%s'", route.ObjectMeta.Name, loggingUrl.Host, projectName))
                     +			} else {
                     +				r.Warn("AGL0019", nil, fmt.Sprintf("Found additional route '%s' matching logging URL '%s' in project: '%s'.  This could mean you have multiple logging deployments.", route.ObjectMeta.Name, loggingUrl.Host, projectName))
                     +			}
                     +		}
                     +	}
                     +	if len(projectName) == 0 {
                     +		message := fmt.Sprintf("Unable to find a route matching the loggingPublicURL defined in the master config '%s'. Check that the URL is correct and aggregated logging is deployed.", loggingUrl)
                     +		r.Error("AGL0014", errors.New(message), message)
                     +		return ""
                     +	}
                     +	project, err := osClient.Projects().Get(projectName)
                     +	if err != nil {
                     +		r.Error("AGL0018", err, fmt.Sprintf("There was an error retrieving project '%s' which is most likely a transient error: %s", projectName, err))
                     +		return ""
                     +	}
                     +	nodeSelector, ok := project.ObjectMeta.Annotations["openshift.io/node-selector"]
                     +	if ok && len(nodeSelector) != 0 {
                     +		r.Warn("AGL0030", nil, fmt.Sprintf(projectNodeSelectorWarning, projectName))
                     +	}
                     +	return projectName
                     +}

pkg/diagnostics/cluster/aggregated_logging/diagnostic_test.go

History View file @ 1fbfe81

                     new file mode 100644
@@ -0,0 +1,78 @@
                     +package aggregated_logging
+                    +
                     +import (
                     +	"github.com/openshift/origin/pkg/diagnostics/log"
                     +	"testing"
                     +)
+                    +
                     +const fakeProject = "someProject"
+                    +
                     +type fakeLogMessage struct {
                     +	id       string
                     +	logLevel log.Level
                     +	message  string
                     +}
+                    +
                     +type fakeDiagnostic struct {
                     +	err      error
                     +	messages map[string]fakeLogMessage
                     +	test     *testing.T
                     +}
+                    +
                     +func newFakeDiagnostic(t *testing.T) *fakeDiagnostic {
                     +	return &fakeDiagnostic{
                     +		messages: map[string]fakeLogMessage{},
                     +		test:     t,
                     +	}
                     +}
+                    +
                     +func (f *fakeDiagnostic) dumpMessages() {
                     +	f.test.Log("<<<<<<<< Dumping test messages >>>>>>>>")
                     +	for id, message := range f.messages {
                     +		f.test.Logf("id: %s, logLevel: %s, message: %s", id, message.logLevel.Name, message.message)
                     +	}
                     +}
+                    +
                     +func (f *fakeDiagnostic) Info(id string, message string) {
                     +	f.messages[id] = fakeLogMessage{id, log.InfoLevel, message}
                     +}
+                    +
                     +func (f *fakeDiagnostic) Error(id string, err error, message string) {
                     +	f.messages[id] = fakeLogMessage{id, log.ErrorLevel, message}
                     +}
+                    +
                     +func (f *fakeDiagnostic) Debug(id string, message string) {
                     +	f.messages[id] = fakeLogMessage{id, log.DebugLevel, message}
                     +}
+                    +
                     +func (f *fakeDiagnostic) Warn(id string, err error, message string) {
                     +	f.messages[id] = fakeLogMessage{id, log.WarnLevel, message}
                     +}
+                    +
                     +func (d *fakeDiagnostic) assertMessage(id string, missing string, level log.Level) {
                     +	message, ok := d.messages[id]
                     +	if !ok {
                     +		d.test.Errorf("Unable to find message with id %s. %s", id, missing)
                     +		return
                     +	}
                     +	if message.logLevel != level {
                     +		d.test.Errorf("Exp logLevel %s for %s but got %s", level.Name, id, message.logLevel.Name)
                     +	}
                     +}
+                    +
                     +func (d *fakeDiagnostic) assertNoWarnings() {
                     +	for _, message := range d.messages {
+                    +
                     +		if message.logLevel == log.WarnLevel {
                     +			d.test.Errorf("Exp no WarnLevel log messages.")
                     +		}
                     +	}
                     +}
                     +func (d *fakeDiagnostic) assertNoErrors() {
                     +	for _, message := range d.messages {
+                    +
                     +		if message.logLevel == log.ErrorLevel {
                     +			d.test.Errorf("Exp no ErrorLevel log messages.")
                     +		}
                     +	}
                     +}

pkg/diagnostics/cluster/aggregated_logging/interfaces.go

History View file @ 1fbfe81

                     new file mode 100644
@@ -0,0 +1,60 @@
                     +package aggregated_logging
+                    +
                     +import (
                     +	kapi "k8s.io/kubernetes/pkg/api"
                     +	kapisext "k8s.io/kubernetes/pkg/apis/extensions"
+                    +
                     +	authapi "github.com/openshift/origin/pkg/authorization/api"
                     +	deployapi "github.com/openshift/origin/pkg/deploy/api"
                     +	routesapi "github.com/openshift/origin/pkg/route/api"
                     +)
+                    +
                     +//diagnosticReporter provides diagnostic messages
                     +type diagnosticReporter interface {
                     +	Info(id string, message string)
                     +	Debug(id string, message string)
                     +	Error(id string, err error, message string)
                     +	Warn(id string, err error, message string)
                     +}
+                    +
                     +type routesAdapter interface {
                     +	routes(project string, options kapi.ListOptions) (*routesapi.RouteList, error)
                     +}
+                    +
                     +type sccAdapter interface {
                     +	getScc(name string) (*kapi.SecurityContextConstraints, error)
                     +}
+                    +
                     +type clusterRoleBindingsAdapter interface {
                     +	getClusterRoleBinding(name string) (*authapi.ClusterRoleBinding, error)
                     +}
+                    +
                     +//deploymentConfigAdapter is an abstraction to retrieve resource for validating dcs
                     +//for aggregated logging diagnostics
                     +type deploymentConfigAdapter interface {
                     +	deploymentconfigs(project string, options kapi.ListOptions) (*deployapi.DeploymentConfigList, error)
                     +	podsAdapter
                     +}
+                    +
                     +//daemonsetAdapter is an abstraction to retrieve resources for validating daemonsets
                     +//for aggregated logging diagnostics
                     +type daemonsetAdapter interface {
                     +	daemonsets(project string, options kapi.ListOptions) (*kapisext.DaemonSetList, error)
                     +	nodes(options kapi.ListOptions) (*kapi.NodeList, error)
                     +	podsAdapter
                     +}
+                    +
                     +type podsAdapter interface {
                     +	pods(project string, options kapi.ListOptions) (*kapi.PodList, error)
                     +}
+                    +
                     +//saAdapter abstractions to retrieve service accounts
                     +type saAdapter interface {
                     +	serviceAccounts(project string, options kapi.ListOptions) (*kapi.ServiceAccountList, error)
                     +}
+                    +
                     +//servicesAdapter abstracts retrieving services
                     +type servicesAdapter interface {
                     +	services(project string, options kapi.ListOptions) (*kapi.ServiceList, error)
                     +	endpointsForService(project string, serviceName string) (*kapi.Endpoints, error)
                     +}

pkg/diagnostics/cluster/aggregated_logging/kibana.go

History View file @ 1fbfe81

                     new file mode 100644
@@ -0,0 +1,98 @@
                     +package aggregated_logging
+                    +
                     +import (
                     +	"errors"
                     +	"fmt"
                     +	"net/url"
                     +	"strings"
+                    +
                     +	kapi "k8s.io/kubernetes/pkg/api"
                     +	kclient "k8s.io/kubernetes/pkg/client/unversioned"
                     +	"k8s.io/kubernetes/pkg/util/sets"
+                    +
                     +	"github.com/openshift/origin/pkg/client"
                     +	"github.com/openshift/origin/pkg/diagnostics/types"
                     +	oauthapi "github.com/openshift/origin/pkg/oauth/api"
                     +)
+                    +
                     +const (
                     +	kibanaProxyOauthClientName = "kibana-proxy"
                     +	kibanaProxySecretName      = "logging-kibana-proxy"
                     +	oauthSecretKeyName         = "oauth-secret"
                     +)
+                    +
                     +//checkKibana verifies the various integration points between Kibana and logging
                     +func checkKibana(r types.DiagnosticResult, osClient *client.Client, kClient *kclient.Client, project string) {
                     +	oauthclient, err := osClient.OAuthClients().Get(kibanaProxyOauthClientName)
                     +	if err != nil {
                     +		r.Error("AGL0115", err, fmt.Sprintf("Error retrieving the OauthClient '%s'. Unable to check Kibana", kibanaProxyOauthClientName))
                     +		return
                     +	}
                     +	checkKibanaSecret(r, osClient, kClient, project, oauthclient)
                     +	checkKibanaRoutesInOauthClient(r, osClient, project, oauthclient)
                     +}
+                    +
                     +//checkKibanaSecret confirms the secret used by kibana matches that configured in the oauth client
                     +func checkKibanaSecret(r types.DiagnosticResult, osClient *client.Client, kClient *kclient.Client, project string, oauthclient *oauthapi.OAuthClient) {
                     +	r.Debug("AGL0100", "Checking oauthclient secrets...")
                     +	secret, err := kClient.Secrets(project).Get(kibanaProxySecretName)
                     +	if err != nil {
                     +		r.Error("AGL0105", err, fmt.Sprintf("Error retrieving the secret '%s'", kibanaProxySecretName))
                     +		return
                     +	}
                     +	decoded, err := decodeSecret(secret, oauthSecretKeyName)
                     +	if err != nil {
                     +		r.Error("AGL0110", err, fmt.Sprintf("Unable to decode Kibana Secret"))
                     +		return
                     +	}
                     +	if decoded != oauthclient.Secret {
                     +		r.Debug("AGL0120", fmt.Sprintf("OauthClient Secret:    '%s'", oauthclient.Secret))
                     +		r.Debug("AGL0125", fmt.Sprintf("Decoded Kibana Secret: '%s'", decoded))
                     +		message := fmt.Sprintf("The %s OauthClient.Secret does not match the decoded oauth secret in '%s'", kibanaProxyOauthClientName, kibanaProxySecretName)
                     +		r.Error("AGL0130", errors.New(message), message)
                     +	}
                     +}
+                    +
                     +//checkKibanaRoutesInOauthClient verifies the client contains the correct redirect uris
                     +func checkKibanaRoutesInOauthClient(r types.DiagnosticResult, osClient *client.Client, project string, oauthclient *oauthapi.OAuthClient) {
                     +	r.Debug("AGL0141", "Checking oauthclient redirectURIs for the logging routes...")
                     +	routeList, err := osClient.Routes(project).List(kapi.ListOptions{LabelSelector: loggingSelector.AsSelector()})
                     +	if err != nil {
                     +		r.Error("AGL0143", err, "Error retrieving the logging routes.")
                     +		return
                     +	}
                     +	redirectUris, err := parseRedirectUris(oauthclient.RedirectURIs)
                     +	if err != nil {
                     +		r.Error("AGL0145", err, "Error parsing the OAuthClient.RedirectURIs")
                     +		return
                     +	}
                     +	for _, route := range routeList.Items {
                     +		if !redirectUris.Has(route.Spec.Host) {
                     +			message := fmt.Sprintf("OauthClient '%s' does not include a redirectURI for route '%s' which is '%s'", oauthclient.ObjectMeta.Name, route.ObjectMeta.Name, route.Spec.Host)
                     +			r.Error("AGL0147", errors.New(message), message)
                     +		}
                     +	}
+                    +
                     +	return
                     +}
+                    +
                     +func parseRedirectUris(uris []string) (sets.String, error) {
                     +	urls := sets.String{}
                     +	for _, uri := range uris {
                     +		url, err := url.Parse(uri)
                     +		if err != nil {
                     +			return urls, err
                     +		}
                     +		urls.Insert(url.Host)
                     +	}
                     +	return urls, nil
                     +}
+                    +
                     +// decodeSecret decodes a base64 encoded entry in a secret and returns the value as decoded string
                     +func decodeSecret(secret *kapi.Secret, key string) (string, error) {
                     +	value, ok := secret.Data[key]
                     +	if !ok {
                     +		return "", errors.New(fmt.Sprintf("The %s secret did not have a data entry for %s", secret.ObjectMeta.Name, key))
                     +	}
                     +	return strings.TrimSpace(string(value)), nil
                     +}

pkg/diagnostics/cluster/aggregated_logging/routes.go

History View file @ 1fbfe81

                     new file mode 100644
@@ -0,0 +1,95 @@
                     +package aggregated_logging
+                    +
                     +import (
                     +	"crypto/tls"
                     +	"crypto/x509"
                     +	"encoding/pem"
                     +	"errors"
                     +	"fmt"
+                    +
                     +	kapi "k8s.io/kubernetes/pkg/api"
+                    +
                     +	routes "github.com/openshift/origin/pkg/route/api"
                     +)
+                    +
                     +const routeUnaccepted = `
                     +An unaccepted route is most likely due to one of the following reasons:
+                    +
                     +* No router has been deployed to serve the route.
                     +* Another route with the same host already exists.
+                    +
                     +If a router has been deployed, look for duplicate matching routes by
                     +running the following:
+                    +
                     +  oc get --all-namespaces routes --template='{{range .items}}{{if eq .spec.host "%[2]s"}}{{println .metadata.name "in" .metadata.namespace}}{{end}}{{end}}'
+                    +
                     +`
                     +const routeCertMissingHostName = `
                     +Try updating the route certificate to include its host as either the CommonName (CN) or one of the alternate names.
                     +`
+                    +
                     +//checkRoutes looks through the logging infra routes to see if they have been accepted, and ...
                     +func checkRoutes(r diagnosticReporter, adapter routesAdapter, project string) {
                     +	r.Debug("AGL0300", "Checking routes...")
                     +	routeList, err := adapter.routes(project, kapi.ListOptions{LabelSelector: loggingSelector.AsSelector()})
                     +	if err != nil {
                     +		r.Error("AGL0305", err, fmt.Sprintf("There was an error retrieving routes in the project '%s' with selector '%s'", project, loggingSelector.AsSelector()))
                     +		return
                     +	}
                     +	if len(routeList.Items) == 0 {
                     +		r.Error("AGL0310", nil, fmt.Sprintf("There were no routes found to support logging in project '%s'", project))
                     +		return
                     +	}
                     +	for _, route := range routeList.Items {
                     +		if !wasAccepted(r, route) {
                     +			r.Error("AGL0325", nil, fmt.Sprintf("Route '%s' has not been accepted by any routers."+routeUnaccepted, route.ObjectMeta.Name, route.Spec.Host))
                     +		}
                     +		if route.Spec.TLS != nil && len(route.Spec.TLS.Certificate) != 0 && len(route.Spec.TLS.Key) != 0 {
                     +			checkRouteCertificate(r, route)
                     +		} else {
                     +			r.Debug("AGL0331", fmt.Sprintf("Skipping key and certificate checks on route '%s'.  Either of them may be missing.", route.ObjectMeta.Name))
                     +		}
                     +	}
                     +}
+                    +
                     +func checkRouteCertificate(r diagnosticReporter, route routes.Route) {
                     +	r.Debug("AGL0330", fmt.Sprintf("Checking certificate for route '%s'...", route.ObjectMeta.Name))
                     +	block, _ := pem.Decode([]byte(route.Spec.TLS.Certificate))
                     +	//verify hostname
                     +	if block != nil {
                     +		cert, err := x509.ParseCertificate(block.Bytes)
                     +		if err != nil {
                     +			r.Error("AGL0335", err, fmt.Sprintf("Unable to parse the certificate for route '%s'", route.ObjectMeta.Name))
                     +			return
                     +		}
                     +		r.Debug("AGL0340", fmt.Sprintf("Cert CommonName: '%s' Cert DNSNames: '%s'", cert.Subject.CommonName, cert.DNSNames))
                     +		if err := cert.VerifyHostname(route.Spec.Host); err != nil {
                     +			r.Error("AGL0345", err, fmt.Sprintf("Route '%[1]s' certficate does not include route host '%[2]s'"+routeCertMissingHostName, route.ObjectMeta.Name, route.Spec.Host))
                     +		}
                     +	} else {
                     +		r.Error("AGL0350", errors.New("Unable to decode the TLS Certificate"), "Unable to decode the TLS Certificate")
                     +	}
+                    +
                     +	//verify key matches cert
                     +	r.Debug("AGL0355", fmt.Sprintf("Checking certificate matches key for route '%s'", route.ObjectMeta.Name))
                     +	_, err := tls.X509KeyPair([]byte(route.Spec.TLS.Certificate), []byte(route.Spec.TLS.Key))
                     +	if err != nil {
                     +		r.Error("AGL0365", err, fmt.Sprintf("Route '%s' key and certficate do not match: %s.  The router will be unable to pass traffic using this route.", route.ObjectMeta.Name, err))
                     +	}
                     +}
+                    +
                     +func wasAccepted(r diagnosticReporter, route routes.Route) bool {
                     +	r.Debug("AGL0310", fmt.Sprintf("Checking if route '%s' was accepted...", route.ObjectMeta.Name))
                     +	accepted := 0
                     +	for _, status := range route.Status.Ingress {
                     +		r.Debug("AGL0315", fmt.Sprintf("Status for router: '%s', host: '%s'", status.RouterName, status.Host))
                     +		for _, condition := range status.Conditions {
                     +			r.Debug("AGL0320", fmt.Sprintf("Condition type: '%s' status: '%s'", condition.Type, condition.Status))
                     +			if condition.Type == routes.RouteAdmitted && condition.Status == kapi.ConditionTrue {
                     +				accepted = accepted + 1
                     +			}
                     +		}
                     +	}
                     +	//Add check to compare acceptance to the number of available routers?
                     +	return accepted > 0
                     +}

pkg/diagnostics/cluster/aggregated_logging/routes_test.go

History View file @ 1fbfe81

                     new file mode 100644
@@ -0,0 +1,115 @@
                     +package aggregated_logging
+                    +
                     +import (
                     +	"errors"
                     +	"testing"
+                    +
                     +	kapi "k8s.io/kubernetes/pkg/api"
+                    +
                     +	"github.com/openshift/origin/pkg/diagnostics/log"
                     +	routesapi "github.com/openshift/origin/pkg/route/api"
                     +)
+                    +
                     +const (
                     +	testRoutesKey = "routes"
                     +)
+                    +
                     +type fakeRoutesDiagnostic struct {
                     +	fakeDiagnostic
                     +	fakeRoutes   routesapi.RouteList
                     +	clienterrors map[string]error
                     +}
+                    +
                     +func newFakeRoutesDiagnostic(t *testing.T) *fakeRoutesDiagnostic {
                     +	return &fakeRoutesDiagnostic{
                     +		fakeDiagnostic: *newFakeDiagnostic(t),
                     +		clienterrors:   map[string]error{},
                     +	}
                     +}
+                    +
                     +func (f *fakeRoutesDiagnostic) addRouteWith(condType routesapi.RouteIngressConditionType, condStatus kapi.ConditionStatus, cert string, key string) {
                     +	ingress := routesapi.RouteIngress{
                     +		Conditions: []routesapi.RouteIngressCondition{
                     +			{
                     +				Type:   condType,
                     +				Status: condStatus,
                     +			},
                     +		},
                     +	}
                     +	route := routesapi.Route{
                     +		ObjectMeta: kapi.ObjectMeta{Name: "aname"},
                     +		Status: routesapi.RouteStatus{
                     +			Ingress: []routesapi.RouteIngress{ingress},
                     +		},
                     +	}
                     +	if len(cert) != 0 && len(key) != 0 {
                     +		tls := routesapi.TLSConfig{
                     +			Certificate: cert,
                     +			Key:         key,
                     +		}
                     +		route.Spec.TLS = &tls
                     +	}
                     +	f.fakeRoutes.Items = append(f.fakeRoutes.Items, route)
                     +}
+                    +
                     +func (f *fakeRoutesDiagnostic) routes(project string, options kapi.ListOptions) (*routesapi.RouteList, error) {
                     +	value, ok := f.clienterrors[testRoutesKey]
                     +	if ok {
                     +		return nil, value
                     +	}
                     +	return &f.fakeRoutes, nil
                     +}
+                    +
                     +func TestRouteWhenErrorFromClient(t *testing.T) {
                     +	d := newFakeRoutesDiagnostic(t)
                     +	d.clienterrors[testRoutesKey] = errors.New("some client error")
+                    +
                     +	checkRoutes(d, d, fakeProject)
                     +	d.assertMessage("AGL0305", "Exp an error when there is a client error retrieving routes", log.ErrorLevel)
                     +	d.dumpMessages()
                     +}
+                    +
                     +func TestRouteWhenZeroRoutesAvailable(t *testing.T) {
                     +	d := newFakeRoutesDiagnostic(t)
+                    +
                     +	checkRoutes(d, d, fakeProject)
                     +	d.assertMessage("AGL0310", "Exp an error when there are no routes to support logging", log.ErrorLevel)
                     +	d.dumpMessages()
                     +}
+                    +
                     +//test error route != accepted
                     +func TestRouteWhenRouteNotAccepted(t *testing.T) {
                     +	d := newFakeRoutesDiagnostic(t)
                     +	d.addRouteWith(routesapi.RouteExtendedValidationFailed, kapi.ConditionTrue, "", "")
+                    +
                     +	checkRoutes(d, d, fakeProject)
                     +	d.assertMessage("AGL0325", "Exp an error when a route was not accepted", log.ErrorLevel)
                     +	d.assertMessage("AGL0331", "Exp to skip the cert check since none specified", log.DebugLevel)
                     +	d.dumpMessages()
                     +}
                     +func TestRouteWhenRouteAccepted(t *testing.T) {
                     +	d := newFakeRoutesDiagnostic(t)
                     +	d.addRouteWith(routesapi.RouteAdmitted, kapi.ConditionTrue, "", "")
+                    +
                     +	checkRoutes(d, d, fakeProject)
                     +	d.assertNoErrors()
                     +	d.dumpMessages()
                     +}
+                    +
                     +func TestRouteWhenErrorDecodingCert(t *testing.T) {
                     +	d := newFakeRoutesDiagnostic(t)
                     +	d.addRouteWith(routesapi.RouteExtendedValidationFailed, kapi.ConditionTrue, "cert", "key")
+                    +
                     +	checkRoutes(d, d, fakeProject)
                     +	d.assertMessage("AGL0350", "Exp an error when unable to decode cert", log.ErrorLevel)
                     +	d.dumpMessages()
                     +}
+                    +
                     +func TestRouteWhenErrorParsingCert(t *testing.T) {
                     +	d := newFakeRoutesDiagnostic(t)
                     +	d.addRouteWith(routesapi.RouteExtendedValidationFailed, kapi.ConditionTrue, "cert", "key")
+                    +
                     +	checkRoutes(d, d, fakeProject)
                     +	d.assertMessage("AGL0350", "Exp an error when unable to decode cert", log.ErrorLevel)
                     +	d.dumpMessages()
                     +}

pkg/diagnostics/cluster/aggregated_logging/scc.go

History View file @ 1fbfe81

                     new file mode 100644
@@ -0,0 +1,37 @@
                     +package aggregated_logging
+                    +
                     +import (
                     +	"fmt"
+                    +
                     +	"k8s.io/kubernetes/pkg/util/sets"
                     +)
+                    +
                     +const sccPrivilegedName = "privileged"
+                    +
                     +var sccPrivilegedNames = sets.NewString(fluentdServiceAccountName)
+                    +
                     +const sccPrivilegedUnboundServiceAccount = `
                     +The ServiceAccount '%[1]s' does not have a privileged SecurityContextConstraint for project '%[2]s'.  As a
                     +user with a cluster-admin role, you can grant the permissions by running
                     +the following:
+                    +
                     +  oadm policy add-scc-to-user privileged system:serviceaccount:%[2]s:%[1]s
                     +`
+                    +
                     +func checkSccs(r diagnosticReporter, adapter sccAdapter, project string) {
                     +	r.Debug("AGL0700", "Checking SecurityContextConstraints...")
                     +	scc, err := adapter.getScc(sccPrivilegedName)
                     +	if err != nil {
                     +		r.Error("AGL0705", err, fmt.Sprintf("There was an error while trying to retrieve the SecurityContextConstraints for the logging stack: %s", err))
                     +		return
                     +	}
                     +	privilegedUsers := sets.NewString()
                     +	for _, user := range scc.Users {
                     +		privilegedUsers.Insert(user)
                     +	}
                     +	for _, name := range sccPrivilegedNames.List() {
                     +		if !privilegedUsers.Has(fmt.Sprintf("system:serviceaccount:%s:%s", project, name)) {
                     +			r.Error("AGL0710", nil, fmt.Sprintf(sccPrivilegedUnboundServiceAccount, name, project))
                     +		}
                     +	}
                     +}

pkg/diagnostics/cluster/aggregated_logging/scc_test.go

History View file @ 1fbfe81

                     new file mode 100644
@@ -0,0 +1,65 @@
                     +package aggregated_logging
+                    +
                     +import (
                     +	"encoding/json"
                     +	"errors"
                     +	"fmt"
                     +	"testing"
+                    +
                     +	kapi "k8s.io/kubernetes/pkg/api"
+                    +
                     +	"github.com/openshift/origin/pkg/diagnostics/log"
                     +)
+                    +
                     +type fakeSccDiagnostic struct {
                     +	fakeDiagnostic
                     +	fakeScc kapi.SecurityContextConstraints
                     +}
+                    +
                     +func newFakeSccDiagnostic(t *testing.T) *fakeSccDiagnostic {
                     +	return &fakeSccDiagnostic{
                     +		fakeDiagnostic: *newFakeDiagnostic(t),
                     +	}
                     +}
+                    +
                     +func (f *fakeSccDiagnostic) getScc(name string) (*kapi.SecurityContextConstraints, error) {
                     +	json, _ := json.Marshal(f.fakeScc)
                     +	f.test.Logf(">> test#getScc(%s), err: %s, scc:%s", name, f.err, string(json))
                     +	if f.err != nil {
                     +		return nil, f.err
                     +	}
                     +	return &f.fakeScc, nil
                     +}
+                    +
                     +func (f *fakeSccDiagnostic) addSccFor(name string, project string) {
                     +	f.fakeScc.Users = append(f.fakeScc.Users, fmt.Sprintf("system:serviceaccount:%s:%s", project, name))
                     +}
+                    +
                     +func TestCheckSccWhenClientReturnsError(t *testing.T) {
                     +	d := newFakeSccDiagnostic(t)
                     +	d.err = errors.New("client error")
+                    +
                     +	checkSccs(d, d, fakeProject)
+                    +
                     +	d.assertMessage("AGL0705", "Exp error when client returns error getting SCC", log.ErrorLevel)
                     +	d.dumpMessages()
                     +}
+                    +
                     +func TestCheckSccWhenMissingPrivelegedUsers(t *testing.T) {
                     +	d := newFakeSccDiagnostic(t)
+                    +
                     +	checkSccs(d, d, fakeProject)
+                    +
                     +	d.assertMessage("AGL0710", "Exp error when SCC is missing exp service acount", log.ErrorLevel)
                     +	d.dumpMessages()
                     +}
+                    +
                     +func TestCheckSccWhenEverytingExists(t *testing.T) {
                     +	d := newFakeSccDiagnostic(t)
                     +	d.addSccFor(fluentdServiceAccountName, fakeProject)
+                    +
                     +	checkSccs(d, d, fakeProject)
+                    +
                     +	d.assertNoErrors()
                     +	d.dumpMessages()
                     +}

pkg/diagnostics/cluster/aggregated_logging/serviceaccounts.go

History View file @ 1fbfe81

                     new file mode 100644
@@ -0,0 +1,38 @@
                     +package aggregated_logging
+                    +
                     +import (
                     +	"fmt"
                     +	"strings"
+                    +
                     +	kapi "k8s.io/kubernetes/pkg/api"
                     +	"k8s.io/kubernetes/pkg/util/sets"
                     +)
+                    +
                     +var serviceAccountNames = sets.NewString("logging-deployer", "aggregated-logging-kibana", "aggregated-logging-curator", "aggregated-logging-elasticsearch", fluentdServiceAccountName)
+                    +
                     +const serviceAccountsMissing = `
                     +Did not find ServiceAccounts: %s.  The logging infrastructure will not function
                     +properly without them.  You may need to re-run the installer.
                     +`
+                    +
                     +func checkServiceAccounts(d diagnosticReporter, f saAdapter, project string) {
                     +	d.Debug("AGL0500", fmt.Sprintf("Checking ServiceAccounts in project '%s'...", project))
                     +	saList, err := f.serviceAccounts(project, kapi.ListOptions{})
                     +	if err != nil {
                     +		d.Error("AGL0505", err, fmt.Sprintf("There was an error while trying to retrieve the pods for the AggregatedLogging stack: %s", err))
                     +		return
                     +	}
                     +	foundNames := sets.NewString()
                     +	for _, sa := range saList.Items {
                     +		foundNames.Insert(sa.ObjectMeta.Name)
                     +	}
                     +	missing := sets.NewString()
                     +	for _, name := range serviceAccountNames.List() {
                     +		if !foundNames.Has(name) {
                     +			missing.Insert(name)
                     +		}
                     +	}
                     +	if missing.Len() != 0 {
                     +		d.Error("AGL0515", nil, fmt.Sprintf(serviceAccountsMissing, strings.Join(missing.List(), ",")))
                     +	}
                     +}

pkg/diagnostics/cluster/aggregated_logging/serviceaccounts_test.go

History View file @ 1fbfe81

                     new file mode 100644
@@ -0,0 +1,64 @@
                     +package aggregated_logging
+                    +
                     +import (
                     +	"errors"
                     +	"testing"
+                    +
                     +	kapi "k8s.io/kubernetes/pkg/api"
+                    +
                     +	"github.com/openshift/origin/pkg/diagnostics/log"
                     +)
+                    +
                     +type mockServiceAccountDiagnostic struct {
                     +	accounts kapi.ServiceAccountList
                     +	fakeDiagnostic
                     +}
+                    +
                     +func newMockServiceAccountDiagnostic(t *testing.T) *mockServiceAccountDiagnostic {
                     +	return &mockServiceAccountDiagnostic{
                     +		accounts:       kapi.ServiceAccountList{},
                     +		fakeDiagnostic: *newFakeDiagnostic(t),
                     +	}
                     +}
+                    +
                     +func (m *mockServiceAccountDiagnostic) serviceAccounts(project string, options kapi.ListOptions) (*kapi.ServiceAccountList, error) {
                     +	if m.err != nil {
                     +		return &m.accounts, m.err
                     +	}
                     +	return &m.accounts, nil
                     +}
+                    +
                     +func (d *mockServiceAccountDiagnostic) addServiceAccountNamed(name string) {
                     +	meta := kapi.ObjectMeta{Name: name}
                     +	d.accounts.Items = append(d.accounts.Items, kapi.ServiceAccount{ObjectMeta: meta})
                     +}
+                    +
                     +func TestCheckingServiceAccountsWhenFailedResponseFromClient(t *testing.T) {
                     +	d := newMockServiceAccountDiagnostic(t)
                     +	d.err = errors.New("Some Error")
                     +	checkServiceAccounts(d, d, fakeProject)
                     +	d.assertMessage("AGL0505",
                     +		"Exp an error when unable to retrieve service accounts because of a client error",
                     +		log.ErrorLevel)
                     +}
+                    +
                     +func TestCheckingServiceAccountsWhenMissingExpectedServiceAccount(t *testing.T) {
                     +	d := newMockServiceAccountDiagnostic(t)
                     +	d.addServiceAccountNamed("foobar")
+                    +
                     +	checkServiceAccounts(d, d, fakeProject)
                     +	d.assertMessage("AGL0515",
                     +		"Exp an error when an expected service account was not found.",
                     +		log.ErrorLevel)
                     +}
+                    +
                     +func TestCheckingServiceAccountsIsOk(t *testing.T) {
                     +	d := newMockServiceAccountDiagnostic(t)
+                    +
                     +	for _, name := range serviceAccountNames.List() {
                     +		d.addServiceAccountNamed(name)
                     +	}
+                    +
                     +	checkServiceAccounts(d, d, fakeProject)
                     +	d.assertNoErrors()
                     +}

pkg/diagnostics/cluster/aggregated_logging/services.go

History View file @ 1fbfe81

                     new file mode 100644
@@ -0,0 +1,61 @@
                     +package aggregated_logging
+                    +
                     +import (
                     +	"fmt"
                     +	"strings"
+                    +
                     +	kapi "k8s.io/kubernetes/pkg/api"
                     +	"k8s.io/kubernetes/pkg/util/sets"
                     +)
+                    +
                     +var loggingServices = sets.NewString("logging-es", "logging-es-cluster", "logging-es-ops", "logging-es-ops-cluster", "logging-kibana", "logging-kibana-ops")
+                    +
                     +const serviceNotFound = `
                     +Expected to find '%s' among the logging services for the project but did not.
                     +`
                     +const serviceOpsNotFound = `
                     +Expected to find '%s' among the logging services for the project but did not. This
                     +may not matter if you chose not to install a separate logging stack to support operations.
                     +`
+                    +
                     +// checkServices looks to see if the aggregated logging services exist
                     +func checkServices(r diagnosticReporter, adapter servicesAdapter, project string) {
                     +	r.Debug("AGL0200", fmt.Sprintf("Checking for services in project '%s' with selector '%s'", project, loggingSelector.AsSelector()))
                     +	serviceList, err := adapter.services(project, kapi.ListOptions{LabelSelector: loggingSelector.AsSelector()})
                     +	if err != nil {
                     +		r.Error("AGL0205", err, fmt.Sprintf("There was an error while trying to retrieve the logging services: %s", err))
                     +		return
                     +	}
                     +	foundServices := sets.NewString()
                     +	for _, service := range serviceList.Items {
                     +		foundServices.Insert(service.ObjectMeta.Name)
                     +		r.Debug("AGL0210", fmt.Sprintf("Retrieved service '%s'", service.ObjectMeta.Name))
                     +	}
                     +	for _, service := range loggingServices.List() {
                     +		if foundServices.Has(service) {
                     +			checkServiceEndpoints(r, adapter, project, service)
                     +		} else {
                     +			if strings.Contains(service, "-ops") {
                     +				r.Warn("AGL0215", nil, fmt.Sprintf(serviceOpsNotFound, service))
                     +			} else {
                     +				r.Error("AGL0217", nil, fmt.Sprintf(serviceNotFound, service))
                     +			}
                     +		}
                     +	}
                     +}
+                    +
                     +// checkServiceEndpoints validates if there is an available endpoint for the service.
                     +func checkServiceEndpoints(r diagnosticReporter, adapter servicesAdapter, project string, service string) {
                     +	endpoints, err := adapter.endpointsForService(project, service)
                     +	if err != nil {
                     +		r.Warn("AGL0220", err, fmt.Sprintf("Unable to retrieve endpoints for service '%s': %s", service, err))
                     +		return
                     +	}
                     +	if len(endpoints.Subsets) == 0 {
                     +		if strings.Contains(service, "-ops") {
                     +			r.Info("AGL0223", fmt.Sprintf("There are no endpoints found for service '%s'. This could mean you choose not to install a separate operations cluster during installation.", service))
                     +		} else {
                     +			r.Warn("AGL0225", nil, fmt.Sprintf("There are no endpoints found for service '%s'. This means there are no pods serviced by this service and this component is not functioning", service))
                     +		}
                     +	}
                     +}

pkg/diagnostics/cluster/aggregated_logging/services_test.go

History View file @ 1fbfe81

                     new file mode 100644
@@ -0,0 +1,104 @@
                     +package aggregated_logging
+                    +
                     +import (
                     +	"errors"
                     +	"testing"
+                    +
                     +	"github.com/openshift/origin/pkg/diagnostics/log"
                     +	kapi "k8s.io/kubernetes/pkg/api"
                     +)
+                    +
                     +type fakeServicesDiagnostic struct {
                     +	list kapi.ServiceList
                     +	fakeDiagnostic
                     +	endpoints   map[string]kapi.Endpoints
                     +	endpointErr error
                     +}
+                    +
                     +func newFakeServicesDiagnostic(t *testing.T) *fakeServicesDiagnostic {
                     +	return &fakeServicesDiagnostic{
                     +		list:           kapi.ServiceList{},
                     +		fakeDiagnostic: *newFakeDiagnostic(t),
                     +		endpoints:      map[string]kapi.Endpoints{},
                     +	}
                     +}
+                    +
                     +func (f *fakeServicesDiagnostic) services(project string, options kapi.ListOptions) (*kapi.ServiceList, error) {
                     +	if f.err != nil {
                     +		return &f.list, f.err
                     +	}
                     +	return &f.list, nil
                     +}
                     +func (f *fakeServicesDiagnostic) endpointsForService(project string, service string) (*kapi.Endpoints, error) {
                     +	if f.endpointErr != nil {
                     +		return nil, f.endpointErr
                     +	}
                     +	endpoints, _ := f.endpoints[service]
                     +	return &endpoints, nil
                     +}
+                    +
                     +func (f *fakeServicesDiagnostic) addEndpointSubsetTo(service string) {
                     +	endpoints := kapi.Endpoints{}
                     +	endpoints.Subsets = []kapi.EndpointSubset{{}}
                     +	f.endpoints[service] = endpoints
                     +}
+                    +
                     +func (f *fakeServicesDiagnostic) addServiceNamed(name string) {
                     +	meta := kapi.ObjectMeta{Name: name}
                     +	f.list.Items = append(f.list.Items, kapi.Service{ObjectMeta: meta})
                     +}
+                    +
                     +// test error from client
                     +func TestCheckingServicesWhenFailedResponseFromClient(t *testing.T) {
                     +	d := newFakeServicesDiagnostic(t)
                     +	d.err = errors.New("an error")
                     +	checkServices(d, d, fakeProject)
                     +	d.assertMessage("AGL0205",
                     +		"Exp an error when unable to retrieve services because of a client error",
                     +		log.ErrorLevel)
                     +}
+                    +
                     +func TestCheckingServicesWhenMissingServices(t *testing.T) {
                     +	d := newFakeServicesDiagnostic(t)
                     +	d.addServiceNamed("logging-es")
+                    +
                     +	checkServices(d, d, fakeProject)
                     +	d.assertMessage("AGL0215",
                     +		"Exp an warning when an expected sercies is not found",
                     +		log.WarnLevel)
                     +}
+                    +
                     +func TestCheckingServicesWarnsWhenRetrievingEndpointsErrors(t *testing.T) {
                     +	d := newFakeServicesDiagnostic(t)
                     +	d.addServiceNamed("logging-es")
                     +	d.endpointErr = errors.New("an endpoint error")
+                    +
                     +	checkServices(d, d, fakeProject)
                     +	d.assertMessage("AGL0220",
                     +		"Exp a warning when there is an error retrieving endpoints for a service",
                     +		log.WarnLevel)
                     +}
+                    +
                     +func TestCheckingServicesWarnsWhenServiceHasNoEndpoints(t *testing.T) {
                     +	d := newFakeServicesDiagnostic(t)
                     +	for _, service := range loggingServices.List() {
                     +		d.addServiceNamed(service)
                     +	}
+                    +
                     +	checkServices(d, d, fakeProject)
                     +	d.assertMessage("AGL0225",
                     +		"Exp a warning when an expected service has no endpoints",
                     +		log.WarnLevel)
                     +}
+                    +
                     +func TestCheckingServicesHasNoErrorsOrWarningsForExpServices(t *testing.T) {
                     +	d := newFakeServicesDiagnostic(t)
                     +	for _, service := range loggingServices.List() {
                     +		d.addServiceNamed(service)
                     +		d.addEndpointSubsetTo(service)
                     +	}
+                    +
                     +	checkServices(d, d, fakeProject)
                     +	d.assertNoErrors()
                     +	d.assertNoWarnings()
                     +}