| ... | ... |
@@ -53,8 +53,19 @@ diagnostics to run which regular users cannot. |
| 53 | 53 |
are skipped. |
| 54 | 54 |
.PP |
| 55 | 55 |
Diagnostics may be individually run by passing diagnostic name as arguments. |
| 56 |
+ |
|
| 57 |
+.PP |
|
| 58 |
+.RS |
|
| 59 |
+ |
|
| 60 |
+.nf |
|
| 61 |
+oadm diagnostics <DiagnosticName> |
|
| 62 |
+ |
|
| 63 |
+.fi |
|
| 64 |
+.RE |
|
| 65 |
+ |
|
| 66 |
+.PP |
|
| 56 | 67 |
The available diagnostic names are: |
| 57 |
-AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus |
|
| 68 |
+AggregatedLogging AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus |
|
| 58 | 69 |
|
| 59 | 70 |
|
| 60 | 71 |
.SH OPTIONS |
| ... | ... |
@@ -53,8 +53,19 @@ diagnostics to run which regular users cannot. |
| 53 | 53 |
are skipped. |
| 54 | 54 |
.PP |
| 55 | 55 |
Diagnostics may be individually run by passing diagnostic name as arguments. |
| 56 |
+ |
|
| 57 |
+.PP |
|
| 58 |
+.RS |
|
| 59 |
+ |
|
| 60 |
+.nf |
|
| 61 |
+oc adm diagnostics <DiagnosticName> |
|
| 62 |
+ |
|
| 63 |
+.fi |
|
| 64 |
+.RE |
|
| 65 |
+ |
|
| 66 |
+.PP |
|
| 56 | 67 |
The available diagnostic names are: |
| 57 |
-AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus |
|
| 68 |
+AggregatedLogging AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus |
|
| 58 | 69 |
|
| 59 | 70 |
|
| 60 | 71 |
.SH OPTIONS |
| ... | ... |
@@ -53,8 +53,19 @@ diagnostics to run which regular users cannot. |
| 53 | 53 |
are skipped. |
| 54 | 54 |
.PP |
| 55 | 55 |
Diagnostics may be individually run by passing diagnostic name as arguments. |
| 56 |
+ |
|
| 57 |
+.PP |
|
| 58 |
+.RS |
|
| 59 |
+ |
|
| 60 |
+.nf |
|
| 61 |
+openshift admin diagnostics <DiagnosticName> |
|
| 62 |
+ |
|
| 63 |
+.fi |
|
| 64 |
+.RE |
|
| 65 |
+ |
|
| 66 |
+.PP |
|
| 56 | 67 |
The available diagnostic names are: |
| 57 |
-AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus |
|
| 68 |
+AggregatedLogging AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus |
|
| 58 | 69 |
|
| 59 | 70 |
|
| 60 | 71 |
.SH OPTIONS |
| ... | ... |
@@ -53,8 +53,19 @@ diagnostics to run which regular users cannot. |
| 53 | 53 |
are skipped. |
| 54 | 54 |
.PP |
| 55 | 55 |
Diagnostics may be individually run by passing diagnostic name as arguments. |
| 56 |
+ |
|
| 57 |
+.PP |
|
| 58 |
+.RS |
|
| 59 |
+ |
|
| 60 |
+.nf |
|
| 61 |
+openshift cli adm diagnostics <DiagnosticName> |
|
| 62 |
+ |
|
| 63 |
+.fi |
|
| 64 |
+.RE |
|
| 65 |
+ |
|
| 66 |
+.PP |
|
| 56 | 67 |
The available diagnostic names are: |
| 57 |
-AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus |
|
| 68 |
+AggregatedLogging AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus |
|
| 58 | 69 |
|
| 59 | 70 |
|
| 60 | 71 |
.SH OPTIONS |
| ... | ... |
@@ -53,8 +53,19 @@ diagnostics to run which regular users cannot. |
| 53 | 53 |
are skipped. |
| 54 | 54 |
.PP |
| 55 | 55 |
Diagnostics may be individually run by passing diagnostic name as arguments. |
| 56 |
+ |
|
| 57 |
+.PP |
|
| 58 |
+.RS |
|
| 59 |
+ |
|
| 60 |
+.nf |
|
| 61 |
+openshift ex diagnostics <DiagnosticName> |
|
| 62 |
+ |
|
| 63 |
+.fi |
|
| 64 |
+.RE |
|
| 65 |
+ |
|
| 66 |
+.PP |
|
| 56 | 67 |
The available diagnostic names are: |
| 57 |
-AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus |
|
| 68 |
+AggregatedLogging AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus |
|
| 58 | 69 |
|
| 59 | 70 |
|
| 60 | 71 |
.SH OPTIONS |
| ... | ... |
@@ -14,13 +14,24 @@ import ( |
| 14 | 14 |
"github.com/openshift/origin/pkg/client" |
| 15 | 15 |
osclientcmd "github.com/openshift/origin/pkg/cmd/util/clientcmd" |
| 16 | 16 |
clustdiags "github.com/openshift/origin/pkg/diagnostics/cluster" |
| 17 |
+ agldiags "github.com/openshift/origin/pkg/diagnostics/cluster/aggregated_logging" |
|
| 17 | 18 |
"github.com/openshift/origin/pkg/diagnostics/types" |
| 18 | 19 |
) |
| 19 | 20 |
|
| 20 | 21 |
var ( |
| 21 | 22 |
// availableClusterDiagnostics contains the names of cluster diagnostics that can be executed |
| 22 | 23 |
// during a single run of diagnostics. Add more diagnostics to the list as they are defined. |
| 23 |
- availableClusterDiagnostics = sets.NewString(clustdiags.NodeDefinitionsName, clustdiags.ClusterRegistryName, clustdiags.ClusterRouterName, clustdiags.ClusterRolesName, clustdiags.ClusterRoleBindingsName, clustdiags.MasterNodeName, clustdiags.MetricsApiProxyName, clustdiags.ServiceExternalIPsName) |
|
| 24 |
+ availableClusterDiagnostics = sets.NewString( |
|
| 25 |
+ agldiags.AggregatedLoggingName, |
|
| 26 |
+ clustdiags.ClusterRegistryName, |
|
| 27 |
+ clustdiags.ClusterRouterName, |
|
| 28 |
+ clustdiags.ClusterRolesName, |
|
| 29 |
+ clustdiags.ClusterRoleBindingsName, |
|
| 30 |
+ clustdiags.MasterNodeName, |
|
| 31 |
+ clustdiags.MetricsApiProxyName, |
|
| 32 |
+ clustdiags.NodeDefinitionsName, |
|
| 33 |
+ clustdiags.ServiceExternalIPsName, |
|
| 34 |
+ ) |
|
| 24 | 35 |
) |
| 25 | 36 |
|
| 26 | 37 |
// buildClusterDiagnostics builds cluster Diagnostic objects if a cluster-admin client can be extracted from the rawConfig passed in. |
| ... | ... |
@@ -46,6 +57,8 @@ func (o DiagnosticsOptions) buildClusterDiagnostics(rawConfig *clientcmdapi.Conf |
| 46 | 46 |
for _, diagnosticName := range requestedDiagnostics {
|
| 47 | 47 |
var d types.Diagnostic |
| 48 | 48 |
switch diagnosticName {
|
| 49 |
+ case agldiags.AggregatedLoggingName: |
|
| 50 |
+ d = agldiags.NewAggregatedLogging(o.MasterConfigLocation, kclusterClient, clusterClient) |
|
| 49 | 51 |
case clustdiags.NodeDefinitionsName: |
| 50 | 52 |
d = &clustdiags.NodeDefinitions{KubeClient: kclusterClient, OsClient: clusterClient}
|
| 51 | 53 |
case clustdiags.MasterNodeName: |
| ... | ... |
@@ -95,7 +95,7 @@ save may be your own. |
| 95 | 95 |
|
| 96 | 96 |
A diagnostic is an object that conforms to the Diagnostic interface |
| 97 | 97 |
(see pkg/diagnostics/types/diagnostic.go). The diagnostic object should |
| 98 |
-be built in one of the builders in the pkg/cmd/experimental/diagnostics |
|
| 98 |
+be built in one of the builders in the pkg/cmd/admin/diagnostics |
|
| 99 | 99 |
package (based on whether it depends on client, cluster-admin, or host |
| 100 | 100 |
configuration). When executed, the diagnostic logs its findings into |
| 101 | 101 |
a result object. It should be assumed that they may run in parallel. |
| 102 | 102 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,41 @@ |
| 0 |
+package aggregated_logging |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "fmt" |
|
| 4 |
+ |
|
| 5 |
+ "k8s.io/kubernetes/pkg/apis/rbac" |
|
| 6 |
+ "k8s.io/kubernetes/pkg/util/sets" |
|
| 7 |
+) |
|
| 8 |
+ |
|
| 9 |
+const clusterReaderRoleBindingName = "cluster-readers" |
|
| 10 |
+ |
|
| 11 |
+var clusterReaderRoleBindingNames = sets.NewString(fluentdServiceAccountName) |
|
| 12 |
+ |
|
| 13 |
+const clusterReaderUnboundServiceAccount = ` |
|
| 14 |
+The ServiceAccount '%[1]s' is not a cluster-reader in the '%[2]s' project. This |
|
| 15 |
+is required to enable Fluentd to look up pod metadata for the logs it gathers. |
|
| 16 |
+As a user with a cluster-admin role, you can grant the permissions by running |
|
| 17 |
+the following: |
|
| 18 |
+ |
|
| 19 |
+ oadm policy add-cluster-role-to-user cluster-reader system:serviceaccount:%[2]s:%[1]s |
|
| 20 |
+` |
|
| 21 |
+ |
|
| 22 |
+func checkClusterRoleBindings(r diagnosticReporter, adapter clusterRoleBindingsAdapter, project string) {
|
|
| 23 |
+ r.Debug("AGL0600", "Checking ClusterRoleBindings...")
|
|
| 24 |
+ crb, err := adapter.getClusterRoleBinding(clusterReaderRoleBindingName) |
|
| 25 |
+ if err != nil {
|
|
| 26 |
+ r.Error("AGL0605", err, fmt.Sprintf("There was an error while trying to retrieve the ClusterRoleBindings for the logging stack: %s", err))
|
|
| 27 |
+ return |
|
| 28 |
+ } |
|
| 29 |
+ boundServiceAccounts := sets.NewString() |
|
| 30 |
+ for _, subject := range crb.Subjects {
|
|
| 31 |
+ if subject.Kind == rbac.ServiceAccountKind && subject.Namespace == project {
|
|
| 32 |
+ boundServiceAccounts.Insert(subject.Name) |
|
| 33 |
+ } |
|
| 34 |
+ } |
|
| 35 |
+ for _, name := range clusterReaderRoleBindingNames.List() {
|
|
| 36 |
+ if !boundServiceAccounts.Has(name) {
|
|
| 37 |
+ r.Error("AGL0610", nil, fmt.Sprintf(clusterReaderUnboundServiceAccount, name, project))
|
|
| 38 |
+ } |
|
| 39 |
+ } |
|
| 40 |
+} |
| 0 | 41 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,70 @@ |
| 0 |
+package aggregated_logging |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "errors" |
|
| 4 |
+ "testing" |
|
| 5 |
+ |
|
| 6 |
+ kapi "k8s.io/kubernetes/pkg/api" |
|
| 7 |
+ "k8s.io/kubernetes/pkg/apis/rbac" |
|
| 8 |
+ |
|
| 9 |
+ authapi "github.com/openshift/origin/pkg/authorization/api" |
|
| 10 |
+ "github.com/openshift/origin/pkg/diagnostics/log" |
|
| 11 |
+) |
|
| 12 |
+ |
|
| 13 |
+type fakeRoleBindingDiagnostic struct {
|
|
| 14 |
+ fakeDiagnostic |
|
| 15 |
+ fakeClusterRoleBinding authapi.ClusterRoleBinding |
|
| 16 |
+} |
|
| 17 |
+ |
|
| 18 |
+func newFakeRoleBindingDiagnostic(t *testing.T) *fakeRoleBindingDiagnostic {
|
|
| 19 |
+ return &fakeRoleBindingDiagnostic{
|
|
| 20 |
+ fakeDiagnostic: *newFakeDiagnostic(t), |
|
| 21 |
+ } |
|
| 22 |
+} |
|
| 23 |
+ |
|
| 24 |
+func (f *fakeRoleBindingDiagnostic) getClusterRoleBinding(name string) (*authapi.ClusterRoleBinding, error) {
|
|
| 25 |
+ if f.err != nil {
|
|
| 26 |
+ return nil, f.err |
|
| 27 |
+ } |
|
| 28 |
+ return &f.fakeClusterRoleBinding, nil |
|
| 29 |
+} |
|
| 30 |
+func (f *fakeRoleBindingDiagnostic) addBinding(name string, namespace string) {
|
|
| 31 |
+ ref := kapi.ObjectReference{
|
|
| 32 |
+ Name: name, |
|
| 33 |
+ Kind: rbac.ServiceAccountKind, |
|
| 34 |
+ Namespace: namespace, |
|
| 35 |
+ } |
|
| 36 |
+ f.fakeClusterRoleBinding.Subjects = append(f.fakeClusterRoleBinding.Subjects, ref) |
|
| 37 |
+} |
|
| 38 |
+ |
|
| 39 |
+//test error when client error |
|
| 40 |
+func TestCheckClusterRoleBindingsWhenErrorFromClientRetrievingRoles(t *testing.T) {
|
|
| 41 |
+ d := newFakeRoleBindingDiagnostic(t) |
|
| 42 |
+ d.err = errors.New("client error")
|
|
| 43 |
+ |
|
| 44 |
+ checkClusterRoleBindings(d, d, fakeProject) |
|
| 45 |
+ |
|
| 46 |
+ d.assertMessage("AGL0605", "Exp. an error message if client error retrieving ClusterRoleBindings", log.ErrorLevel)
|
|
| 47 |
+ d.dumpMessages() |
|
| 48 |
+} |
|
| 49 |
+ |
|
| 50 |
+func TestCheckClusterRoleBindingsWhenClusterReaderIsNotInProject(t *testing.T) {
|
|
| 51 |
+ d := newFakeRoleBindingDiagnostic(t) |
|
| 52 |
+ d.addBinding("someName", "someRandomProject")
|
|
| 53 |
+ d.addBinding(fluentdServiceAccountName, fakeProject) |
|
| 54 |
+ |
|
| 55 |
+ checkClusterRoleBindings(d, d, fakeProject) |
|
| 56 |
+ |
|
| 57 |
+ d.assertNoErrors() |
|
| 58 |
+ d.dumpMessages() |
|
| 59 |
+} |
|
| 60 |
+ |
|
| 61 |
+func TestCheckClusterRoleBindingsWhenUnboundServiceAccounts(t *testing.T) {
|
|
| 62 |
+ d := newFakeRoleBindingDiagnostic(t) |
|
| 63 |
+ d.addBinding(fluentdServiceAccountName, "someRandomProject") |
|
| 64 |
+ |
|
| 65 |
+ checkClusterRoleBindings(d, d, fakeProject) |
|
| 66 |
+ |
|
| 67 |
+ d.assertMessage("AGL0610", "Exp. an error when the exp service-accounts dont have cluster-reader access", log.ErrorLevel)
|
|
| 68 |
+ d.dumpMessages() |
|
| 69 |
+} |
| 0 | 70 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,118 @@ |
| 0 |
+package aggregated_logging |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "fmt" |
|
| 4 |
+ |
|
| 5 |
+ kapi "k8s.io/kubernetes/pkg/api" |
|
| 6 |
+ kapisext "k8s.io/kubernetes/pkg/apis/extensions" |
|
| 7 |
+ "k8s.io/kubernetes/pkg/labels" |
|
| 8 |
+) |
|
| 9 |
+ |
|
| 10 |
+const daemonSetNoLabeledNodes = ` |
|
| 11 |
+There are no nodes that match the selector for DaemonSet '%[1]s'. This |
|
| 12 |
+means Fluentd is not running and is not gathering logs from any nodes. |
|
| 13 |
+An example of a command to target a specific node for this DaemonSet: |
|
| 14 |
+ |
|
| 15 |
+ oc label node/node1.example.com %[2]s |
|
| 16 |
+ |
|
| 17 |
+or to label them all: |
|
| 18 |
+ |
|
| 19 |
+ oc label node --all %[2]s |
|
| 20 |
+` |
|
| 21 |
+ |
|
| 22 |
+const daemonSetPartialNodesLabeled = ` |
|
| 23 |
+There are some nodes that match the selector for DaemonSet '%s'. |
|
| 24 |
+A list of matching nodes can be discovered by running: |
|
| 25 |
+ |
|
| 26 |
+ oc get nodes -l %s |
|
| 27 |
+` |
|
| 28 |
+const daemonSetNoPodsFound = ` |
|
| 29 |
+There were no pods found that match DaemonSet '%s' with matchLabels '%s' |
|
| 30 |
+` |
|
| 31 |
+const daemonSetPodsNotRunning = ` |
|
| 32 |
+The Pod '%[1]s' matched by DaemonSet '%[2]s' is not in '%[3]s' status: %[4]s. |
|
| 33 |
+ |
|
| 34 |
+Depending upon the state, this could mean there is an error running the image |
|
| 35 |
+for one or more pod containers, the node could be pulling images, etc. Try running |
|
| 36 |
+the following commands to get additional information: |
|
| 37 |
+ |
|
| 38 |
+ oc describe pod %[1]s -n %[5]s |
|
| 39 |
+ oc logs %[1]s -n %[5]s |
|
| 40 |
+ oc get events -n %[5]s |
|
| 41 |
+` |
|
| 42 |
+const daemonSetNotFound = ` |
|
| 43 |
+There were no DaemonSets in project '%s' that included label '%s'. This implies |
|
| 44 |
+the Fluentd pods are not deployed or the logging stack needs to be upgraded. Try |
|
| 45 |
+running the installer to upgrade the logging stack. |
|
| 46 |
+` |
|
| 47 |
+ |
|
| 48 |
+var loggingInfraFluentdSelector = labels.Set{loggingInfraKey: "fluentd"}
|
|
| 49 |
+ |
|
| 50 |
+func checkDaemonSets(r diagnosticReporter, adapter daemonsetAdapter, project string) {
|
|
| 51 |
+ r.Debug("AGL0400", fmt.Sprintf("Checking DaemonSets in project '%s'...", project))
|
|
| 52 |
+ dsList, err := adapter.daemonsets(project, kapi.ListOptions{LabelSelector: loggingInfraFluentdSelector.AsSelector()})
|
|
| 53 |
+ if err != nil {
|
|
| 54 |
+ r.Error("AGL0405", err, fmt.Sprintf("There was an error while trying to retrieve the logging DaemonSets in project '%s' which is most likely transient: %s", project, err))
|
|
| 55 |
+ return |
|
| 56 |
+ } |
|
| 57 |
+ if len(dsList.Items) == 0 {
|
|
| 58 |
+ r.Error("AGL0407", err, fmt.Sprintf(daemonSetNotFound, project, loggingInfraFluentdSelector.AsSelector()))
|
|
| 59 |
+ return |
|
| 60 |
+ } |
|
| 61 |
+ nodeList, err := adapter.nodes(kapi.ListOptions{})
|
|
| 62 |
+ if err != nil {
|
|
| 63 |
+ r.Error("AGL0410", err, fmt.Sprintf("There was an error while trying to retrieve the list of Nodes which is most likely transient: %s", err))
|
|
| 64 |
+ return |
|
| 65 |
+ } |
|
| 66 |
+ for _, ds := range dsList.Items {
|
|
| 67 |
+ labeled := 0 |
|
| 68 |
+ nodeSelector := labels.Set(ds.Spec.Template.Spec.NodeSelector).AsSelector() |
|
| 69 |
+ r.Debug("AGL0415", fmt.Sprintf("Checking DaemonSet '%s' nodeSelector '%s'", ds.ObjectMeta.Name, nodeSelector))
|
|
| 70 |
+ for _, node := range nodeList.Items {
|
|
| 71 |
+ if nodeSelector.Matches(labels.Set(node.Labels)) {
|
|
| 72 |
+ labeled = labeled + 1 |
|
| 73 |
+ } |
|
| 74 |
+ } |
|
| 75 |
+ switch {
|
|
| 76 |
+ case labeled == 0: |
|
| 77 |
+ r.Error("AGL0420", nil, fmt.Sprintf(daemonSetNoLabeledNodes, ds.ObjectMeta.Name, nodeSelector))
|
|
| 78 |
+ break |
|
| 79 |
+ case labeled < len(nodeList.Items): |
|
| 80 |
+ r.Warn("AGL0425", nil, fmt.Sprintf(daemonSetPartialNodesLabeled, ds.ObjectMeta.Name, nodeSelector))
|
|
| 81 |
+ break |
|
| 82 |
+ default: |
|
| 83 |
+ r.Debug("AGL0430", fmt.Sprintf("DaemonSet '%s' matches all nodes", ds.ObjectMeta.Name))
|
|
| 84 |
+ } |
|
| 85 |
+ if labeled > 0 {
|
|
| 86 |
+ checkDaemonSetPods(r, adapter, ds, project, labeled) |
|
| 87 |
+ } |
|
| 88 |
+ } |
|
| 89 |
+} |
|
| 90 |
+ |
|
| 91 |
+func checkDaemonSetPods(r diagnosticReporter, adapter daemonsetAdapter, ds kapisext.DaemonSet, project string, numLabeledNodes int) {
|
|
| 92 |
+ if ds.Spec.Selector == nil {
|
|
| 93 |
+ r.Debug("AGL0455", "DaemonSet selector is nil. Unable to verify a pod is running")
|
|
| 94 |
+ return |
|
| 95 |
+ } |
|
| 96 |
+ podSelector := labels.Set(ds.Spec.Selector.MatchLabels).AsSelector() |
|
| 97 |
+ r.Debug("AGL0435", fmt.Sprintf("Checking for running pods for DaemonSet '%s' with matchLabels '%s'", ds.ObjectMeta.Name, podSelector))
|
|
| 98 |
+ podList, err := adapter.pods(project, kapi.ListOptions{LabelSelector: podSelector})
|
|
| 99 |
+ if err != nil {
|
|
| 100 |
+ r.Error("AGL0438", err, fmt.Sprintf("There was an error retrieving pods matched to DaemonSet '%s' that is most likely transient: %s", ds.ObjectMeta.Name, err))
|
|
| 101 |
+ return |
|
| 102 |
+ } |
|
| 103 |
+ if len(podList.Items) == 0 {
|
|
| 104 |
+ r.Error("AGL0440", nil, fmt.Sprintf(daemonSetNoPodsFound, ds.ObjectMeta.Name, podSelector))
|
|
| 105 |
+ return |
|
| 106 |
+ } |
|
| 107 |
+ if len(podList.Items) != numLabeledNodes {
|
|
| 108 |
+ r.Error("AGL0443", nil, fmt.Sprintf("The number of deployed pods %s does not match the number of labeled nodes %d", len(podList.Items), numLabeledNodes))
|
|
| 109 |
+ } |
|
| 110 |
+ for _, pod := range podList.Items {
|
|
| 111 |
+ if pod.Status.Phase != kapi.PodRunning {
|
|
| 112 |
+ podName := pod.ObjectMeta.Name |
|
| 113 |
+ r.Error("AGL0445", nil, fmt.Sprintf(daemonSetPodsNotRunning, podName, ds.ObjectMeta.Name, kapi.PodRunning, pod.Status.Phase, project))
|
|
| 114 |
+ } |
|
| 115 |
+ |
|
| 116 |
+ } |
|
| 117 |
+} |
| 0 | 118 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,188 @@ |
| 0 |
+package aggregated_logging |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "errors" |
|
| 4 |
+ "testing" |
|
| 5 |
+ |
|
| 6 |
+ kapi "k8s.io/kubernetes/pkg/api" |
|
| 7 |
+ "k8s.io/kubernetes/pkg/api/unversioned" |
|
| 8 |
+ kapisext "k8s.io/kubernetes/pkg/apis/extensions" |
|
| 9 |
+ |
|
| 10 |
+ "github.com/openshift/origin/pkg/diagnostics/log" |
|
| 11 |
+) |
|
| 12 |
+ |
|
| 13 |
+const ( |
|
| 14 |
+ testPodsKey = "pods" |
|
| 15 |
+ testNodesKey = "nodes" |
|
| 16 |
+ testDsKey = "daemonsets" |
|
| 17 |
+) |
|
| 18 |
+ |
|
| 19 |
+type fakeDaemonSetDiagnostic struct {
|
|
| 20 |
+ fakeDiagnostic |
|
| 21 |
+ fakePods kapi.PodList |
|
| 22 |
+ fakeNodes kapi.NodeList |
|
| 23 |
+ fakeDaemonsets kapisext.DaemonSetList |
|
| 24 |
+ clienterrors map[string]error |
|
| 25 |
+} |
|
| 26 |
+ |
|
| 27 |
+func newFakeDaemonSetDiagnostic(t *testing.T) *fakeDaemonSetDiagnostic {
|
|
| 28 |
+ return &fakeDaemonSetDiagnostic{
|
|
| 29 |
+ fakeDiagnostic: *newFakeDiagnostic(t), |
|
| 30 |
+ clienterrors: map[string]error{},
|
|
| 31 |
+ } |
|
| 32 |
+} |
|
| 33 |
+ |
|
| 34 |
+func (f *fakeDaemonSetDiagnostic) addDsPodWithPhase(state kapi.PodPhase) {
|
|
| 35 |
+ pod := kapi.Pod{
|
|
| 36 |
+ Spec: kapi.PodSpec{},
|
|
| 37 |
+ Status: kapi.PodStatus{
|
|
| 38 |
+ Phase: state, |
|
| 39 |
+ }, |
|
| 40 |
+ } |
|
| 41 |
+ f.fakePods.Items = append(f.fakePods.Items, pod) |
|
| 42 |
+} |
|
| 43 |
+ |
|
| 44 |
+func (f *fakeDaemonSetDiagnostic) addDaemonSetWithSelector(key string, value string) {
|
|
| 45 |
+ selector := map[string]string{key: value}
|
|
| 46 |
+ ds := kapisext.DaemonSet{
|
|
| 47 |
+ Spec: kapisext.DaemonSetSpec{
|
|
| 48 |
+ Template: kapi.PodTemplateSpec{
|
|
| 49 |
+ Spec: kapi.PodSpec{
|
|
| 50 |
+ NodeSelector: selector, |
|
| 51 |
+ }, |
|
| 52 |
+ }, |
|
| 53 |
+ Selector: &unversioned.LabelSelector{MatchLabels: selector},
|
|
| 54 |
+ }, |
|
| 55 |
+ } |
|
| 56 |
+ f.fakeDaemonsets.Items = append(f.fakeDaemonsets.Items, ds) |
|
| 57 |
+} |
|
| 58 |
+ |
|
| 59 |
+func (f *fakeDaemonSetDiagnostic) addNodeWithLabel(key string, value string) {
|
|
| 60 |
+ labels := map[string]string{key: value}
|
|
| 61 |
+ node := kapi.Node{
|
|
| 62 |
+ ObjectMeta: kapi.ObjectMeta{
|
|
| 63 |
+ Labels: labels, |
|
| 64 |
+ }, |
|
| 65 |
+ } |
|
| 66 |
+ f.fakeNodes.Items = append(f.fakeNodes.Items, node) |
|
| 67 |
+} |
|
| 68 |
+ |
|
| 69 |
+func (f *fakeDaemonSetDiagnostic) daemonsets(project string, options kapi.ListOptions) (*kapisext.DaemonSetList, error) {
|
|
| 70 |
+ value, ok := f.clienterrors[testDsKey] |
|
| 71 |
+ if ok {
|
|
| 72 |
+ return nil, value |
|
| 73 |
+ } |
|
| 74 |
+ return &f.fakeDaemonsets, nil |
|
| 75 |
+} |
|
| 76 |
+ |
|
| 77 |
+func (f *fakeDaemonSetDiagnostic) nodes(options kapi.ListOptions) (*kapi.NodeList, error) {
|
|
| 78 |
+ value, ok := f.clienterrors[testNodesKey] |
|
| 79 |
+ if ok {
|
|
| 80 |
+ return nil, value |
|
| 81 |
+ } |
|
| 82 |
+ return &f.fakeNodes, nil |
|
| 83 |
+} |
|
| 84 |
+ |
|
| 85 |
+func (f *fakeDaemonSetDiagnostic) pods(project string, options kapi.ListOptions) (*kapi.PodList, error) {
|
|
| 86 |
+ value, ok := f.clienterrors[testPodsKey] |
|
| 87 |
+ if ok {
|
|
| 88 |
+ return nil, value |
|
| 89 |
+ } |
|
| 90 |
+ return &f.fakePods, nil |
|
| 91 |
+} |
|
| 92 |
+ |
|
| 93 |
+func TestCheckDaemonsetsWhenErrorResponseFromClientRetrievingDaemonsets(t *testing.T) {
|
|
| 94 |
+ d := newFakeDaemonSetDiagnostic(t) |
|
| 95 |
+ d.clienterrors[testDsKey] = errors.New("someerror")
|
|
| 96 |
+ |
|
| 97 |
+ checkDaemonSets(d, d, fakeProject) |
|
| 98 |
+ |
|
| 99 |
+ d.assertMessage("AGL0405", "Exp. error when client errors on retrieving DaemonSets", log.ErrorLevel)
|
|
| 100 |
+} |
|
| 101 |
+ |
|
| 102 |
+func TestCheckDaemonsetsWhenNoDaemonsetsFound(t *testing.T) {
|
|
| 103 |
+ d := newFakeDaemonSetDiagnostic(t) |
|
| 104 |
+ |
|
| 105 |
+ checkDaemonSets(d, d, fakeProject) |
|
| 106 |
+ |
|
| 107 |
+ d.assertMessage("AGL0407", "Exp. error when client retrieves no DaemonSets", log.ErrorLevel)
|
|
| 108 |
+} |
|
| 109 |
+ |
|
| 110 |
+func TestCheckDaemonsetsWhenErrorResponseFromClientRetrievingNodes(t *testing.T) {
|
|
| 111 |
+ d := newFakeDaemonSetDiagnostic(t) |
|
| 112 |
+ d.clienterrors[testNodesKey] = errors.New("someerror")
|
|
| 113 |
+ d.addDaemonSetWithSelector("foo", "bar")
|
|
| 114 |
+ |
|
| 115 |
+ checkDaemonSets(d, d, fakeProject) |
|
| 116 |
+ |
|
| 117 |
+ d.assertMessage("AGL0410", "Exp. error when client errors on retrieving Nodes", log.ErrorLevel)
|
|
| 118 |
+} |
|
| 119 |
+ |
|
| 120 |
+func TestCheckDaemonsetsWhenDaemonsetsMatchNoNodes(t *testing.T) {
|
|
| 121 |
+ d := newFakeDaemonSetDiagnostic(t) |
|
| 122 |
+ d.addDaemonSetWithSelector("foo", "bar")
|
|
| 123 |
+ d.addNodeWithLabel("foo", "xyz")
|
|
| 124 |
+ |
|
| 125 |
+ checkDaemonSets(d, d, fakeProject) |
|
| 126 |
+ |
|
| 127 |
+ d.assertMessage("AGL0420", "Exp. error when daemonsets do not match any nodes", log.ErrorLevel)
|
|
| 128 |
+} |
|
| 129 |
+ |
|
| 130 |
+func TestCheckDaemonsetsWhenDaemonsetsMatchPartialNodes(t *testing.T) {
|
|
| 131 |
+ d := newFakeDaemonSetDiagnostic(t) |
|
| 132 |
+ d.addDaemonSetWithSelector("foo", "bar")
|
|
| 133 |
+ d.addNodeWithLabel("foo", "bar")
|
|
| 134 |
+ d.addNodeWithLabel("foo", "xyz")
|
|
| 135 |
+ |
|
| 136 |
+ checkDaemonSets(d, d, fakeProject) |
|
| 137 |
+ |
|
| 138 |
+ d.assertMessage("AGL0425", "Exp. warning when daemonsets matches less then all the nodes", log.WarnLevel)
|
|
| 139 |
+} |
|
| 140 |
+ |
|
| 141 |
+func TestCheckDaemonsetsWhenClientErrorsFetchingPods(t *testing.T) {
|
|
| 142 |
+ d := newFakeDaemonSetDiagnostic(t) |
|
| 143 |
+ d.clienterrors[testPodsKey] = errors.New("some error")
|
|
| 144 |
+ d.addDaemonSetWithSelector("foo", "bar")
|
|
| 145 |
+ d.addNodeWithLabel("foo", "bar")
|
|
| 146 |
+ |
|
| 147 |
+ checkDaemonSets(d, d, fakeProject) |
|
| 148 |
+ |
|
| 149 |
+ d.assertMessage("AGL0438", "Exp. error when there is an error retrieving pods for a daemonset", log.ErrorLevel)
|
|
| 150 |
+ |
|
| 151 |
+ d.dumpMessages() |
|
| 152 |
+} |
|
| 153 |
+ |
|
| 154 |
+func TestCheckDaemonsetsWhenNoPodsMatchDaemonSet(t *testing.T) {
|
|
| 155 |
+ d := newFakeDaemonSetDiagnostic(t) |
|
| 156 |
+ d.addDaemonSetWithSelector("foo", "bar")
|
|
| 157 |
+ d.addNodeWithLabel("foo", "bar")
|
|
| 158 |
+ |
|
| 159 |
+ checkDaemonSets(d, d, fakeProject) |
|
| 160 |
+ |
|
| 161 |
+ d.assertMessage("AGL0440", "Exp. error when there are no pods that match a daemonset", log.ErrorLevel)
|
|
| 162 |
+ d.dumpMessages() |
|
| 163 |
+} |
|
| 164 |
+ |
|
| 165 |
+func TestCheckDaemonsetsWhenNoPodsInRunningState(t *testing.T) {
|
|
| 166 |
+ d := newFakeDaemonSetDiagnostic(t) |
|
| 167 |
+ d.addDaemonSetWithSelector("foo", "bar")
|
|
| 168 |
+ d.addNodeWithLabel("foo", "bar")
|
|
| 169 |
+ d.addDsPodWithPhase(kapi.PodPending) |
|
| 170 |
+ |
|
| 171 |
+ checkDaemonSets(d, d, fakeProject) |
|
| 172 |
+ |
|
| 173 |
+ d.assertMessage("AGL0445", "Exp. error when there are no pods in running state", log.ErrorLevel)
|
|
| 174 |
+ d.dumpMessages() |
|
| 175 |
+} |
|
| 176 |
+ |
|
| 177 |
+func TestCheckDaemonsetsWhenAllPodsInRunningState(t *testing.T) {
|
|
| 178 |
+ d := newFakeDaemonSetDiagnostic(t) |
|
| 179 |
+ d.addDaemonSetWithSelector("foo", "bar")
|
|
| 180 |
+ d.addNodeWithLabel("foo", "bar")
|
|
| 181 |
+ d.addDsPodWithPhase(kapi.PodRunning) |
|
| 182 |
+ |
|
| 183 |
+ checkDaemonSets(d, d, fakeProject) |
|
| 184 |
+ |
|
| 185 |
+ d.assertNoErrors() |
|
| 186 |
+ d.dumpMessages() |
|
| 187 |
+} |
| 0 | 188 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,128 @@ |
| 0 |
+package aggregated_logging |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "fmt" |
|
| 4 |
+ "strings" |
|
| 5 |
+ |
|
| 6 |
+ kapi "k8s.io/kubernetes/pkg/api" |
|
| 7 |
+ "k8s.io/kubernetes/pkg/labels" |
|
| 8 |
+ "k8s.io/kubernetes/pkg/selection" |
|
| 9 |
+ "k8s.io/kubernetes/pkg/util/sets" |
|
| 10 |
+ |
|
| 11 |
+ deployapi "github.com/openshift/origin/pkg/deploy/api" |
|
| 12 |
+) |
|
| 13 |
+ |
|
| 14 |
+const ( |
|
| 15 |
+ componentNameEs = "es" |
|
| 16 |
+ componentNameEsOps = "es-ops" |
|
| 17 |
+ componentNameKibana = "kibana" |
|
| 18 |
+ componentNameKibanaOps = "kibana-ops" |
|
| 19 |
+ componentNameCurator = "curator" |
|
| 20 |
+) |
|
| 21 |
+ |
|
| 22 |
+// loggingComponents are those 'managed' by rep controllers (e.g. fluentd is deployed with a DaemonSet) |
|
| 23 |
+var loggingComponents = sets.NewString(componentNameEs, componentNameEsOps, componentNameKibana, componentNameKibanaOps, componentNameCurator) |
|
| 24 |
+ |
|
| 25 |
+const deploymentConfigWarnMissingForOps = ` |
|
| 26 |
+Did not find a DeploymentConfig to support component '%s'. If you require |
|
| 27 |
+a separate ElasticSearch cluster to aggregate operations logs, please re-install |
|
| 28 |
+or update logging and specify the appropriate switch to enable the ops cluster. |
|
| 29 |
+` |
|
| 30 |
+ |
|
| 31 |
+const deploymentConfigZeroPodsFound = ` |
|
| 32 |
+There were no Pods found that support logging. Try running |
|
| 33 |
+the following commands for additional information: |
|
| 34 |
+ |
|
| 35 |
+ oc describe dc -n %[1]s |
|
| 36 |
+ oc get events -n %[1]s |
|
| 37 |
+` |
|
| 38 |
+const deploymentConfigNoPodsFound = ` |
|
| 39 |
+There were no Pods found for DeploymentConfig '%[1]s'. Try running |
|
| 40 |
+the following commands for additional information: |
|
| 41 |
+ |
|
| 42 |
+ oc describe dc %[1]s -n %[2]s |
|
| 43 |
+ oc get events -n %[2]s |
|
| 44 |
+` |
|
| 45 |
+const deploymentConfigPodsNotRunning = ` |
|
| 46 |
+The Pod '%[1]s' matched by DeploymentConfig '%[2]s' is not in '%[3]s' status: %[4]s. |
|
| 47 |
+ |
|
| 48 |
+Depending upon the state, this could mean there is an error running the image |
|
| 49 |
+for one or more pod containers, the node could be pulling images, etc. Try running |
|
| 50 |
+the following commands for additional information: |
|
| 51 |
+ |
|
| 52 |
+ oc describe pod %[1]s -n %[5]s |
|
| 53 |
+ oc logs %[1]s -n %[5]s |
|
| 54 |
+ oc get events -n %[5]s |
|
| 55 |
+` |
|
| 56 |
+ |
|
| 57 |
+func checkDeploymentConfigs(r diagnosticReporter, adapter deploymentConfigAdapter, project string) {
|
|
| 58 |
+ req, _ := labels.NewRequirement(loggingInfraKey, selection.Exists, nil) |
|
| 59 |
+ selector := labels.NewSelector().Add(*req) |
|
| 60 |
+ r.Debug("AGL0040", fmt.Sprintf("Checking for DeploymentConfigs in project '%s' with selector '%s'", project, selector))
|
|
| 61 |
+ dcList, err := adapter.deploymentconfigs(project, kapi.ListOptions{LabelSelector: selector})
|
|
| 62 |
+ if err != nil {
|
|
| 63 |
+ r.Error("AGL0045", err, fmt.Sprintf("There was an error while trying to retrieve the DeploymentConfigs in project '%s': %s", project, err))
|
|
| 64 |
+ return |
|
| 65 |
+ } |
|
| 66 |
+ if len(dcList.Items) == 0 {
|
|
| 67 |
+ r.Error("AGL0047", nil, fmt.Sprintf("Did not find any matching DeploymentConfigs in project '%s' which means no logging components were deployed. Try running the installer.", project))
|
|
| 68 |
+ return |
|
| 69 |
+ } |
|
| 70 |
+ found := sets.NewString() |
|
| 71 |
+ for _, entry := range dcList.Items {
|
|
| 72 |
+ comp := labels.Set(entry.ObjectMeta.Labels).Get(componentKey) |
|
| 73 |
+ found.Insert(comp) |
|
| 74 |
+ r.Debug("AGL0050", fmt.Sprintf("Found DeploymentConfig '%s' for component '%s'", entry.ObjectMeta.Name, comp))
|
|
| 75 |
+ } |
|
| 76 |
+ for _, entry := range loggingComponents.List() {
|
|
| 77 |
+ exists := found.Has(entry) |
|
| 78 |
+ if !exists {
|
|
| 79 |
+ if strings.HasSuffix(entry, "-ops") {
|
|
| 80 |
+ r.Info("AGL0060", fmt.Sprintf(deploymentConfigWarnMissingForOps, entry))
|
|
| 81 |
+ } else {
|
|
| 82 |
+ r.Error("AGL0065", nil, fmt.Sprintf("Did not find a DeploymentConfig to support component '%s'", entry))
|
|
| 83 |
+ } |
|
| 84 |
+ } |
|
| 85 |
+ } |
|
| 86 |
+ checkDeploymentConfigPods(r, adapter, *dcList, project) |
|
| 87 |
+} |
|
| 88 |
+ |
|
| 89 |
+func checkDeploymentConfigPods(r diagnosticReporter, adapter deploymentConfigAdapter, dcs deployapi.DeploymentConfigList, project string) {
|
|
| 90 |
+ compReq, _ := labels.NewRequirement(componentKey, selection.In, loggingComponents) |
|
| 91 |
+ provReq, _ := labels.NewRequirement(providerKey, selection.Equals, sets.NewString(openshiftValue)) |
|
| 92 |
+ podSelector := labels.NewSelector().Add(*compReq, *provReq) |
|
| 93 |
+ r.Debug("AGL0070", fmt.Sprintf("Getting pods that match selector '%s'", podSelector))
|
|
| 94 |
+ podList, err := adapter.pods(project, kapi.ListOptions{LabelSelector: podSelector})
|
|
| 95 |
+ if err != nil {
|
|
| 96 |
+ r.Error("AGL0075", err, fmt.Sprintf("There was an error while trying to retrieve the pods for the AggregatedLogging stack: %s", err))
|
|
| 97 |
+ return |
|
| 98 |
+ } |
|
| 99 |
+ if len(podList.Items) == 0 {
|
|
| 100 |
+ r.Error("AGL0080", nil, fmt.Sprintf(deploymentConfigZeroPodsFound, project))
|
|
| 101 |
+ return |
|
| 102 |
+ } |
|
| 103 |
+ dcPodCount := make(map[string]int, len(dcs.Items)) |
|
| 104 |
+ for _, dc := range dcs.Items {
|
|
| 105 |
+ dcPodCount[dc.ObjectMeta.Name] = 0 |
|
| 106 |
+ } |
|
| 107 |
+ |
|
| 108 |
+ for _, pod := range podList.Items {
|
|
| 109 |
+ r.Debug("AGL0082", fmt.Sprintf("Checking status of Pod '%s'...", pod.ObjectMeta.Name))
|
|
| 110 |
+ dcName, hasDcName := pod.ObjectMeta.Annotations[deployapi.DeploymentConfigAnnotation] |
|
| 111 |
+ if !hasDcName {
|
|
| 112 |
+ r.Warn("AGL0085", nil, fmt.Sprintf("Found Pod '%s' that that does not reference a logging deployment config which may be acceptable. Skipping check to see if its running.", pod.ObjectMeta.Name))
|
|
| 113 |
+ continue |
|
| 114 |
+ } |
|
| 115 |
+ if pod.Status.Phase != kapi.PodRunning {
|
|
| 116 |
+ podName := pod.ObjectMeta.Name |
|
| 117 |
+ r.Error("AGL0090", nil, fmt.Sprintf(deploymentConfigPodsNotRunning, podName, dcName, kapi.PodRunning, pod.Status.Phase, project))
|
|
| 118 |
+ } |
|
| 119 |
+ count, _ := dcPodCount[dcName] |
|
| 120 |
+ dcPodCount[dcName] = count + 1 |
|
| 121 |
+ } |
|
| 122 |
+ for name, count := range dcPodCount {
|
|
| 123 |
+ if count == 0 {
|
|
| 124 |
+ r.Error("AGL0095", nil, fmt.Sprintf(deploymentConfigNoPodsFound, name, project))
|
|
| 125 |
+ } |
|
| 126 |
+ } |
|
| 127 |
+} |
| 0 | 128 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,153 @@ |
| 0 |
+package aggregated_logging |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "errors" |
|
| 4 |
+ "testing" |
|
| 5 |
+ |
|
| 6 |
+ kapi "k8s.io/kubernetes/pkg/api" |
|
| 7 |
+ |
|
| 8 |
+ deployapi "github.com/openshift/origin/pkg/deploy/api" |
|
| 9 |
+ "github.com/openshift/origin/pkg/diagnostics/log" |
|
| 10 |
+) |
|
| 11 |
+ |
|
| 12 |
+const ( |
|
| 13 |
+ testDcPodsKey = "pods" |
|
| 14 |
+ testDcKey = "deploymentconfigs" |
|
| 15 |
+ testSkipAnnotation = "skipAddAnnoation" |
|
| 16 |
+) |
|
| 17 |
+ |
|
| 18 |
+type fakeDeploymentConfigsDiagnostic struct {
|
|
| 19 |
+ fakeDiagnostic |
|
| 20 |
+ fakePods kapi.PodList |
|
| 21 |
+ fakeDcs deployapi.DeploymentConfigList |
|
| 22 |
+ clienterrors map[string]error |
|
| 23 |
+} |
|
| 24 |
+ |
|
| 25 |
+func newFakeDeploymentConfigsDiagnostic(t *testing.T) *fakeDeploymentConfigsDiagnostic {
|
|
| 26 |
+ return &fakeDeploymentConfigsDiagnostic{
|
|
| 27 |
+ fakeDiagnostic: *newFakeDiagnostic(t), |
|
| 28 |
+ clienterrors: map[string]error{},
|
|
| 29 |
+ } |
|
| 30 |
+} |
|
| 31 |
+func (f *fakeDeploymentConfigsDiagnostic) addDeployConfigFor(component string) {
|
|
| 32 |
+ labels := map[string]string{componentKey: component}
|
|
| 33 |
+ dc := deployapi.DeploymentConfig{
|
|
| 34 |
+ ObjectMeta: kapi.ObjectMeta{
|
|
| 35 |
+ Name: component + "Name", |
|
| 36 |
+ Labels: labels, |
|
| 37 |
+ }, |
|
| 38 |
+ } |
|
| 39 |
+ f.fakeDcs.Items = append(f.fakeDcs.Items, dc) |
|
| 40 |
+} |
|
| 41 |
+ |
|
| 42 |
+func (f *fakeDeploymentConfigsDiagnostic) addPodFor(comp string, state kapi.PodPhase) {
|
|
| 43 |
+ annotations := map[string]string{}
|
|
| 44 |
+ if comp != testSkipAnnotation {
|
|
| 45 |
+ annotations[deployapi.DeploymentConfigAnnotation] = comp |
|
| 46 |
+ } |
|
| 47 |
+ pod := kapi.Pod{
|
|
| 48 |
+ ObjectMeta: kapi.ObjectMeta{
|
|
| 49 |
+ Name: comp, |
|
| 50 |
+ Annotations: annotations, |
|
| 51 |
+ }, |
|
| 52 |
+ Spec: kapi.PodSpec{},
|
|
| 53 |
+ Status: kapi.PodStatus{
|
|
| 54 |
+ Phase: state, |
|
| 55 |
+ }, |
|
| 56 |
+ } |
|
| 57 |
+ f.fakePods.Items = append(f.fakePods.Items, pod) |
|
| 58 |
+} |
|
| 59 |
+ |
|
| 60 |
+func (f *fakeDeploymentConfigsDiagnostic) deploymentconfigs(project string, options kapi.ListOptions) (*deployapi.DeploymentConfigList, error) {
|
|
| 61 |
+ f.test.Logf(">> calling deploymentconfigs: %s", f.clienterrors)
|
|
| 62 |
+ value, ok := f.clienterrors[testDcKey] |
|
| 63 |
+ if ok {
|
|
| 64 |
+ f.test.Logf(">> error key found..returning %s", value)
|
|
| 65 |
+ return nil, value |
|
| 66 |
+ } |
|
| 67 |
+ f.test.Logf(">> error key not found..")
|
|
| 68 |
+ return &f.fakeDcs, nil |
|
| 69 |
+} |
|
| 70 |
+ |
|
| 71 |
+func (f *fakeDeploymentConfigsDiagnostic) pods(project string, options kapi.ListOptions) (*kapi.PodList, error) {
|
|
| 72 |
+ value, ok := f.clienterrors[testDcPodsKey] |
|
| 73 |
+ if ok {
|
|
| 74 |
+ return nil, value |
|
| 75 |
+ } |
|
| 76 |
+ return &f.fakePods, nil |
|
| 77 |
+} |
|
| 78 |
+ |
|
| 79 |
+//test client error listing dcs |
|
| 80 |
+func TestCheckDcWhenErrorResponseFromClientRetrievingDc(t *testing.T) {
|
|
| 81 |
+ d := newFakeDeploymentConfigsDiagnostic(t) |
|
| 82 |
+ d.clienterrors[testDcKey] = errors.New("error")
|
|
| 83 |
+ |
|
| 84 |
+ checkDeploymentConfigs(d, d, fakeProject) |
|
| 85 |
+ |
|
| 86 |
+ d.assertMessage("AGL0045", "Exp. an error when client returns error retrieving dcs", log.ErrorLevel)
|
|
| 87 |
+ d.dumpMessages() |
|
| 88 |
+} |
|
| 89 |
+ |
|
| 90 |
+func TestCheckDcWhenNoDeployConfigsFound(t *testing.T) {
|
|
| 91 |
+ d := newFakeDeploymentConfigsDiagnostic(t) |
|
| 92 |
+ |
|
| 93 |
+ checkDeploymentConfigs(d, d, fakeProject) |
|
| 94 |
+ |
|
| 95 |
+ d.assertMessage("AGL0047", "Exp. an error when no DeploymentConfigs are found", log.ErrorLevel)
|
|
| 96 |
+ d.dumpMessages() |
|
| 97 |
+} |
|
| 98 |
+ |
|
| 99 |
+func TestCheckDcWhenOpsOrOtherDeployConfigsMissing(t *testing.T) {
|
|
| 100 |
+ d := newFakeDeploymentConfigsDiagnostic(t) |
|
| 101 |
+ d.addDeployConfigFor(componentNameEs) |
|
| 102 |
+ |
|
| 103 |
+ checkDeploymentConfigs(d, d, fakeProject) |
|
| 104 |
+ |
|
| 105 |
+ d.assertMessage("AGL0060", "Exp. a warning when ops DeploymentConfigs are missing", log.InfoLevel)
|
|
| 106 |
+ d.assertMessage("AGL0065", "Exp. an error when non-ops DeploymentConfigs are missing", log.ErrorLevel)
|
|
| 107 |
+ d.dumpMessages() |
|
| 108 |
+} |
|
| 109 |
+ |
|
| 110 |
+func TestCheckDcWhenClientErrorListingPods(t *testing.T) {
|
|
| 111 |
+ d := newFakeDeploymentConfigsDiagnostic(t) |
|
| 112 |
+ d.clienterrors[testDcPodsKey] = errors.New("New pod error")
|
|
| 113 |
+ for _, comp := range loggingComponents.List() {
|
|
| 114 |
+ d.addDeployConfigFor(comp) |
|
| 115 |
+ } |
|
| 116 |
+ |
|
| 117 |
+ checkDeploymentConfigs(d, d, fakeProject) |
|
| 118 |
+ |
|
| 119 |
+ d.assertMessage("AGL0075", "Exp. an error when retrieving pods errors", log.ErrorLevel)
|
|
| 120 |
+ d.dumpMessages() |
|
| 121 |
+} |
|
| 122 |
+ |
|
| 123 |
+func TestCheckDcWhenNoPodsFoundMatchingDeployConfig(t *testing.T) {
|
|
| 124 |
+ d := newFakeDeploymentConfigsDiagnostic(t) |
|
| 125 |
+ for _, comp := range loggingComponents.List() {
|
|
| 126 |
+ d.addDeployConfigFor(comp) |
|
| 127 |
+ } |
|
| 128 |
+ |
|
| 129 |
+ checkDeploymentConfigs(d, d, fakeProject) |
|
| 130 |
+ |
|
| 131 |
+ d.assertMessage("AGL0080", "Exp. an error when retrieving pods errors", log.ErrorLevel)
|
|
| 132 |
+ d.dumpMessages() |
|
| 133 |
+} |
|
| 134 |
+ |
|
| 135 |
+func TestCheckDcWhenInVariousStates(t *testing.T) {
|
|
| 136 |
+ d := newFakeDeploymentConfigsDiagnostic(t) |
|
| 137 |
+ for _, comp := range loggingComponents.List() {
|
|
| 138 |
+ d.addDeployConfigFor(comp) |
|
| 139 |
+ d.addPodFor(comp, kapi.PodRunning) |
|
| 140 |
+ } |
|
| 141 |
+ d.addPodFor(testSkipAnnotation, kapi.PodRunning) |
|
| 142 |
+ d.addPodFor("someothercomponent", kapi.PodPending)
|
|
| 143 |
+ d.addDeployConfigFor("somerandom component")
|
|
| 144 |
+ |
|
| 145 |
+ checkDeploymentConfigs(d, d, fakeProject) |
|
| 146 |
+ |
|
| 147 |
+ d.assertMessage("AGL0085", "Exp. a warning when pod is missing DeployConfig annotation", log.WarnLevel)
|
|
| 148 |
+ d.assertMessage("AGL0090", "Exp. an error when pod is not in running state", log.ErrorLevel)
|
|
| 149 |
+ d.assertMessage("AGL0095", "Exp. an error when pods not found for a DeployConfig", log.ErrorLevel)
|
|
| 150 |
+ |
|
| 151 |
+ d.dumpMessages() |
|
| 152 |
+} |
| 0 | 153 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,207 @@ |
| 0 |
+package aggregated_logging |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "errors" |
|
| 4 |
+ "fmt" |
|
| 5 |
+ "net/url" |
|
| 6 |
+ |
|
| 7 |
+ kapi "k8s.io/kubernetes/pkg/api" |
|
| 8 |
+ kapisext "k8s.io/kubernetes/pkg/apis/extensions" |
|
| 9 |
+ kclient "k8s.io/kubernetes/pkg/client/unversioned" |
|
| 10 |
+ "k8s.io/kubernetes/pkg/labels" |
|
| 11 |
+ |
|
| 12 |
+ authapi "github.com/openshift/origin/pkg/authorization/api" |
|
| 13 |
+ "github.com/openshift/origin/pkg/client" |
|
| 14 |
+ configapi "github.com/openshift/origin/pkg/cmd/server/api" |
|
| 15 |
+ deployapi "github.com/openshift/origin/pkg/deploy/api" |
|
| 16 |
+ hostdiag "github.com/openshift/origin/pkg/diagnostics/host" |
|
| 17 |
+ "github.com/openshift/origin/pkg/diagnostics/types" |
|
| 18 |
+ routesapi "github.com/openshift/origin/pkg/route/api" |
|
| 19 |
+) |
|
| 20 |
+ |
|
| 21 |
+// AggregatedLogging is a Diagnostic to check the configurations |
|
| 22 |
+// and general integration of the OpenShift stack |
|
| 23 |
+// for aggregating container logs |
|
| 24 |
+// https://github.com/openshift/origin-aggregated-logging |
|
| 25 |
+type AggregatedLogging struct {
|
|
| 26 |
+ masterConfig *configapi.MasterConfig |
|
| 27 |
+ MasterConfigFile string |
|
| 28 |
+ OsClient *client.Client |
|
| 29 |
+ KubeClient *kclient.Client |
|
| 30 |
+ result types.DiagnosticResult |
|
| 31 |
+} |
|
| 32 |
+ |
|
| 33 |
+const ( |
|
| 34 |
+ AggregatedLoggingName = "AggregatedLogging" |
|
| 35 |
+ |
|
| 36 |
+ loggingInfraKey = "logging-infra" |
|
| 37 |
+ componentKey = "component" |
|
| 38 |
+ providerKey = "provider" |
|
| 39 |
+ openshiftValue = "openshift" |
|
| 40 |
+ |
|
| 41 |
+ fluentdServiceAccountName = "aggregated-logging-fluentd" |
|
| 42 |
+) |
|
| 43 |
+ |
|
| 44 |
+var loggingSelector = labels.Set{loggingInfraKey: "support"}
|
|
| 45 |
+ |
|
| 46 |
+//NewAggregatedLogging returns the AggregatedLogging Diagnostic |
|
| 47 |
+func NewAggregatedLogging(masterConfigFile string, kclient *kclient.Client, osclient *client.Client) *AggregatedLogging {
|
|
| 48 |
+ return &AggregatedLogging{nil, masterConfigFile, osclient, kclient, types.NewDiagnosticResult(AggregatedLoggingName)}
|
|
| 49 |
+} |
|
| 50 |
+ |
|
| 51 |
+func (d *AggregatedLogging) getScc(name string) (*kapi.SecurityContextConstraints, error) {
|
|
| 52 |
+ return d.KubeClient.SecurityContextConstraints().Get(name) |
|
| 53 |
+} |
|
| 54 |
+ |
|
| 55 |
+func (d *AggregatedLogging) getClusterRoleBinding(name string) (*authapi.ClusterRoleBinding, error) {
|
|
| 56 |
+ return d.OsClient.ClusterRoleBindings().Get(name) |
|
| 57 |
+} |
|
| 58 |
+ |
|
| 59 |
+func (d *AggregatedLogging) routes(project string, options kapi.ListOptions) (*routesapi.RouteList, error) {
|
|
| 60 |
+ return d.OsClient.Routes(project).List(options) |
|
| 61 |
+} |
|
| 62 |
+ |
|
| 63 |
+func (d *AggregatedLogging) serviceAccounts(project string, options kapi.ListOptions) (*kapi.ServiceAccountList, error) {
|
|
| 64 |
+ return d.KubeClient.ServiceAccounts(project).List(options) |
|
| 65 |
+} |
|
| 66 |
+ |
|
| 67 |
+func (d *AggregatedLogging) services(project string, options kapi.ListOptions) (*kapi.ServiceList, error) {
|
|
| 68 |
+ return d.KubeClient.Services(project).List(options) |
|
| 69 |
+} |
|
| 70 |
+ |
|
| 71 |
+func (d *AggregatedLogging) endpointsForService(project string, service string) (*kapi.Endpoints, error) {
|
|
| 72 |
+ return d.KubeClient.Endpoints(project).Get(service) |
|
| 73 |
+} |
|
| 74 |
+ |
|
| 75 |
+func (d *AggregatedLogging) daemonsets(project string, options kapi.ListOptions) (*kapisext.DaemonSetList, error) {
|
|
| 76 |
+ return d.KubeClient.DaemonSets(project).List(kapi.ListOptions{LabelSelector: loggingInfraFluentdSelector.AsSelector()})
|
|
| 77 |
+} |
|
| 78 |
+ |
|
| 79 |
+func (d *AggregatedLogging) nodes(options kapi.ListOptions) (*kapi.NodeList, error) {
|
|
| 80 |
+ return d.KubeClient.Nodes().List(kapi.ListOptions{})
|
|
| 81 |
+} |
|
| 82 |
+ |
|
| 83 |
+func (d *AggregatedLogging) pods(project string, options kapi.ListOptions) (*kapi.PodList, error) {
|
|
| 84 |
+ return d.KubeClient.Pods(project).List(options) |
|
| 85 |
+} |
|
| 86 |
+func (d *AggregatedLogging) deploymentconfigs(project string, options kapi.ListOptions) (*deployapi.DeploymentConfigList, error) {
|
|
| 87 |
+ return d.OsClient.DeploymentConfigs(project).List(options) |
|
| 88 |
+} |
|
| 89 |
+ |
|
| 90 |
+func (d *AggregatedLogging) Info(id string, message string) {
|
|
| 91 |
+ d.result.Info(id, message) |
|
| 92 |
+} |
|
| 93 |
+ |
|
| 94 |
+func (d *AggregatedLogging) Error(id string, err error, message string) {
|
|
| 95 |
+ d.result.Error(id, err, message) |
|
| 96 |
+} |
|
| 97 |
+ |
|
| 98 |
+func (d *AggregatedLogging) Debug(id string, message string) {
|
|
| 99 |
+ d.result.Debug(id, message) |
|
| 100 |
+} |
|
| 101 |
+ |
|
| 102 |
+func (d *AggregatedLogging) Warn(id string, err error, message string) {
|
|
| 103 |
+ d.result.Warn(id, err, message) |
|
| 104 |
+} |
|
| 105 |
+ |
|
| 106 |
+func (d *AggregatedLogging) Name() string {
|
|
| 107 |
+ return AggregatedLoggingName |
|
| 108 |
+} |
|
| 109 |
+ |
|
| 110 |
+func (d *AggregatedLogging) Description() string {
|
|
| 111 |
+ return "Check aggregated logging integration for proper configuration" |
|
| 112 |
+} |
|
| 113 |
+ |
|
| 114 |
+func (d *AggregatedLogging) CanRun() (bool, error) {
|
|
| 115 |
+ if len(d.MasterConfigFile) == 0 {
|
|
| 116 |
+ return false, errors.New("No master config file was provided")
|
|
| 117 |
+ } |
|
| 118 |
+ if d.OsClient == nil {
|
|
| 119 |
+ return false, errors.New("Config must include a cluster-admin context to run this diagnostic")
|
|
| 120 |
+ } |
|
| 121 |
+ if d.KubeClient == nil {
|
|
| 122 |
+ return false, errors.New("Config must include a cluster-admin context to run this diagnostic")
|
|
| 123 |
+ } |
|
| 124 |
+ var err error |
|
| 125 |
+ d.masterConfig, err = hostdiag.GetMasterConfig(d.result, d.MasterConfigFile) |
|
| 126 |
+ if err != nil {
|
|
| 127 |
+ return false, errors.New("Unreadable master config; skipping this diagnostic.")
|
|
| 128 |
+ } |
|
| 129 |
+ return true, nil |
|
| 130 |
+} |
|
| 131 |
+ |
|
| 132 |
+func (d *AggregatedLogging) Check() types.DiagnosticResult {
|
|
| 133 |
+ project := retrieveLoggingProject(d.result, d.masterConfig, d.OsClient) |
|
| 134 |
+ if len(project) != 0 {
|
|
| 135 |
+ checkServiceAccounts(d, d, project) |
|
| 136 |
+ checkClusterRoleBindings(d, d, project) |
|
| 137 |
+ checkSccs(d, d, project) |
|
| 138 |
+ checkDeploymentConfigs(d, d, project) |
|
| 139 |
+ checkDaemonSets(d, d, project) |
|
| 140 |
+ checkServices(d, d, project) |
|
| 141 |
+ checkRoutes(d, d, project) |
|
| 142 |
+ checkKibana(d.result, d.OsClient, d.KubeClient, project) |
|
| 143 |
+ } |
|
| 144 |
+ return d.result |
|
| 145 |
+} |
|
| 146 |
+ |
|
| 147 |
+const projectNodeSelectorWarning = ` |
|
| 148 |
+The project '%[1]s' was found with a non-empty node selector annotation. This will keep |
|
| 149 |
+Fluentd from running on certain nodes and collecting logs from the entire cluster. You |
|
| 150 |
+can correct it by editing the project: |
|
| 151 |
+ |
|
| 152 |
+ oc edit namespace %[1]s |
|
| 153 |
+ |
|
| 154 |
+and updating the annotation: |
|
| 155 |
+ |
|
| 156 |
+ 'openshift.io/node-selector' : "" |
|
| 157 |
+ |
|
| 158 |
+` |
|
| 159 |
+ |
|
| 160 |
+func retrieveLoggingProject(r types.DiagnosticResult, masterCfg *configapi.MasterConfig, osClient *client.Client) string {
|
|
| 161 |
+ r.Debug("AGL0010", fmt.Sprintf("masterConfig.AssetConfig.LoggingPublicURL: '%s'", masterCfg.AssetConfig.LoggingPublicURL))
|
|
| 162 |
+ projectName := "" |
|
| 163 |
+ if len(masterCfg.AssetConfig.LoggingPublicURL) == 0 {
|
|
| 164 |
+ r.Debug("AGL0017", "masterConfig.AssetConfig.LoggingPublicURL is empty")
|
|
| 165 |
+ return projectName |
|
| 166 |
+ } |
|
| 167 |
+ |
|
| 168 |
+ loggingUrl, err := url.Parse(masterCfg.AssetConfig.LoggingPublicURL) |
|
| 169 |
+ if err != nil {
|
|
| 170 |
+ r.Error("AGL0011", err, fmt.Sprintf("Unable to parse the loggingPublicURL from the masterConfig '%s'", masterCfg.AssetConfig.LoggingPublicURL))
|
|
| 171 |
+ return projectName |
|
| 172 |
+ } |
|
| 173 |
+ |
|
| 174 |
+ routeList, err := osClient.Routes(kapi.NamespaceAll).List(kapi.ListOptions{LabelSelector: loggingSelector.AsSelector()})
|
|
| 175 |
+ if err != nil {
|
|
| 176 |
+ r.Error("AGL0012", err, fmt.Sprintf("There was an error while trying to find the route associated with '%s' which is probably transient: %s", loggingUrl, err))
|
|
| 177 |
+ return projectName |
|
| 178 |
+ } |
|
| 179 |
+ |
|
| 180 |
+ for _, route := range routeList.Items {
|
|
| 181 |
+ r.Debug("AGL0013", fmt.Sprintf("Comparing URL to route.Spec.Host: %s", route.Spec.Host))
|
|
| 182 |
+ if loggingUrl.Host == route.Spec.Host {
|
|
| 183 |
+ if len(projectName) == 0 {
|
|
| 184 |
+ projectName = route.ObjectMeta.Namespace |
|
| 185 |
+ r.Info("AGL0015", fmt.Sprintf("Found route '%s' matching logging URL '%s' in project: '%s'", route.ObjectMeta.Name, loggingUrl.Host, projectName))
|
|
| 186 |
+ } else {
|
|
| 187 |
+ r.Warn("AGL0019", nil, fmt.Sprintf("Found additional route '%s' matching logging URL '%s' in project: '%s'. This could mean you have multiple logging deployments.", route.ObjectMeta.Name, loggingUrl.Host, projectName))
|
|
| 188 |
+ } |
|
| 189 |
+ } |
|
| 190 |
+ } |
|
| 191 |
+ if len(projectName) == 0 {
|
|
| 192 |
+ message := fmt.Sprintf("Unable to find a route matching the loggingPublicURL defined in the master config '%s'. Check that the URL is correct and aggregated logging is deployed.", loggingUrl)
|
|
| 193 |
+ r.Error("AGL0014", errors.New(message), message)
|
|
| 194 |
+ return "" |
|
| 195 |
+ } |
|
| 196 |
+ project, err := osClient.Projects().Get(projectName) |
|
| 197 |
+ if err != nil {
|
|
| 198 |
+ r.Error("AGL0018", err, fmt.Sprintf("There was an error retrieving project '%s' which is most likely a transient error: %s", projectName, err))
|
|
| 199 |
+ return "" |
|
| 200 |
+ } |
|
| 201 |
+ nodeSelector, ok := project.ObjectMeta.Annotations["openshift.io/node-selector"] |
|
| 202 |
+ if ok && len(nodeSelector) != 0 {
|
|
| 203 |
+ r.Warn("AGL0030", nil, fmt.Sprintf(projectNodeSelectorWarning, projectName))
|
|
| 204 |
+ } |
|
| 205 |
+ return projectName |
|
| 206 |
+} |
| 0 | 207 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,78 @@ |
| 0 |
+package aggregated_logging |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "github.com/openshift/origin/pkg/diagnostics/log" |
|
| 4 |
+ "testing" |
|
| 5 |
+) |
|
| 6 |
+ |
|
| 7 |
+const fakeProject = "someProject" |
|
| 8 |
+ |
|
| 9 |
+type fakeLogMessage struct {
|
|
| 10 |
+ id string |
|
| 11 |
+ logLevel log.Level |
|
| 12 |
+ message string |
|
| 13 |
+} |
|
| 14 |
+ |
|
| 15 |
+type fakeDiagnostic struct {
|
|
| 16 |
+ err error |
|
| 17 |
+ messages map[string]fakeLogMessage |
|
| 18 |
+ test *testing.T |
|
| 19 |
+} |
|
| 20 |
+ |
|
| 21 |
+func newFakeDiagnostic(t *testing.T) *fakeDiagnostic {
|
|
| 22 |
+ return &fakeDiagnostic{
|
|
| 23 |
+ messages: map[string]fakeLogMessage{},
|
|
| 24 |
+ test: t, |
|
| 25 |
+ } |
|
| 26 |
+} |
|
| 27 |
+ |
|
| 28 |
+func (f *fakeDiagnostic) dumpMessages() {
|
|
| 29 |
+ f.test.Log("<<<<<<<< Dumping test messages >>>>>>>>")
|
|
| 30 |
+ for id, message := range f.messages {
|
|
| 31 |
+ f.test.Logf("id: %s, logLevel: %s, message: %s", id, message.logLevel.Name, message.message)
|
|
| 32 |
+ } |
|
| 33 |
+} |
|
| 34 |
+ |
|
| 35 |
+func (f *fakeDiagnostic) Info(id string, message string) {
|
|
| 36 |
+ f.messages[id] = fakeLogMessage{id, log.InfoLevel, message}
|
|
| 37 |
+} |
|
| 38 |
+ |
|
| 39 |
+func (f *fakeDiagnostic) Error(id string, err error, message string) {
|
|
| 40 |
+ f.messages[id] = fakeLogMessage{id, log.ErrorLevel, message}
|
|
| 41 |
+} |
|
| 42 |
+ |
|
| 43 |
+func (f *fakeDiagnostic) Debug(id string, message string) {
|
|
| 44 |
+ f.messages[id] = fakeLogMessage{id, log.DebugLevel, message}
|
|
| 45 |
+} |
|
| 46 |
+ |
|
| 47 |
+func (f *fakeDiagnostic) Warn(id string, err error, message string) {
|
|
| 48 |
+ f.messages[id] = fakeLogMessage{id, log.WarnLevel, message}
|
|
| 49 |
+} |
|
| 50 |
+ |
|
| 51 |
+func (d *fakeDiagnostic) assertMessage(id string, missing string, level log.Level) {
|
|
| 52 |
+ message, ok := d.messages[id] |
|
| 53 |
+ if !ok {
|
|
| 54 |
+ d.test.Errorf("Unable to find message with id %s. %s", id, missing)
|
|
| 55 |
+ return |
|
| 56 |
+ } |
|
| 57 |
+ if message.logLevel != level {
|
|
| 58 |
+ d.test.Errorf("Exp logLevel %s for %s but got %s", level.Name, id, message.logLevel.Name)
|
|
| 59 |
+ } |
|
| 60 |
+} |
|
| 61 |
+ |
|
| 62 |
+func (d *fakeDiagnostic) assertNoWarnings() {
|
|
| 63 |
+ for _, message := range d.messages {
|
|
| 64 |
+ |
|
| 65 |
+ if message.logLevel == log.WarnLevel {
|
|
| 66 |
+ d.test.Errorf("Exp no WarnLevel log messages.")
|
|
| 67 |
+ } |
|
| 68 |
+ } |
|
| 69 |
+} |
|
| 70 |
+func (d *fakeDiagnostic) assertNoErrors() {
|
|
| 71 |
+ for _, message := range d.messages {
|
|
| 72 |
+ |
|
| 73 |
+ if message.logLevel == log.ErrorLevel {
|
|
| 74 |
+ d.test.Errorf("Exp no ErrorLevel log messages.")
|
|
| 75 |
+ } |
|
| 76 |
+ } |
|
| 77 |
+} |
| 0 | 78 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,60 @@ |
| 0 |
+package aggregated_logging |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ kapi "k8s.io/kubernetes/pkg/api" |
|
| 4 |
+ kapisext "k8s.io/kubernetes/pkg/apis/extensions" |
|
| 5 |
+ |
|
| 6 |
+ authapi "github.com/openshift/origin/pkg/authorization/api" |
|
| 7 |
+ deployapi "github.com/openshift/origin/pkg/deploy/api" |
|
| 8 |
+ routesapi "github.com/openshift/origin/pkg/route/api" |
|
| 9 |
+) |
|
| 10 |
+ |
|
| 11 |
+//diagnosticReporter provides diagnostic messages |
|
| 12 |
+type diagnosticReporter interface {
|
|
| 13 |
+ Info(id string, message string) |
|
| 14 |
+ Debug(id string, message string) |
|
| 15 |
+ Error(id string, err error, message string) |
|
| 16 |
+ Warn(id string, err error, message string) |
|
| 17 |
+} |
|
| 18 |
+ |
|
| 19 |
+type routesAdapter interface {
|
|
| 20 |
+ routes(project string, options kapi.ListOptions) (*routesapi.RouteList, error) |
|
| 21 |
+} |
|
| 22 |
+ |
|
| 23 |
+type sccAdapter interface {
|
|
| 24 |
+ getScc(name string) (*kapi.SecurityContextConstraints, error) |
|
| 25 |
+} |
|
| 26 |
+ |
|
| 27 |
+type clusterRoleBindingsAdapter interface {
|
|
| 28 |
+ getClusterRoleBinding(name string) (*authapi.ClusterRoleBinding, error) |
|
| 29 |
+} |
|
| 30 |
+ |
|
| 31 |
+//deploymentConfigAdapter is an abstraction to retrieve resource for validating dcs |
|
| 32 |
+//for aggregated logging diagnostics |
|
| 33 |
+type deploymentConfigAdapter interface {
|
|
| 34 |
+ deploymentconfigs(project string, options kapi.ListOptions) (*deployapi.DeploymentConfigList, error) |
|
| 35 |
+ podsAdapter |
|
| 36 |
+} |
|
| 37 |
+ |
|
| 38 |
+//daemonsetAdapter is an abstraction to retrieve resources for validating daemonsets |
|
| 39 |
+//for aggregated logging diagnostics |
|
| 40 |
+type daemonsetAdapter interface {
|
|
| 41 |
+ daemonsets(project string, options kapi.ListOptions) (*kapisext.DaemonSetList, error) |
|
| 42 |
+ nodes(options kapi.ListOptions) (*kapi.NodeList, error) |
|
| 43 |
+ podsAdapter |
|
| 44 |
+} |
|
| 45 |
+ |
|
| 46 |
+type podsAdapter interface {
|
|
| 47 |
+ pods(project string, options kapi.ListOptions) (*kapi.PodList, error) |
|
| 48 |
+} |
|
| 49 |
+ |
|
| 50 |
+//saAdapter abstractions to retrieve service accounts |
|
| 51 |
+type saAdapter interface {
|
|
| 52 |
+ serviceAccounts(project string, options kapi.ListOptions) (*kapi.ServiceAccountList, error) |
|
| 53 |
+} |
|
| 54 |
+ |
|
| 55 |
+//servicesAdapter abstracts retrieving services |
|
| 56 |
+type servicesAdapter interface {
|
|
| 57 |
+ services(project string, options kapi.ListOptions) (*kapi.ServiceList, error) |
|
| 58 |
+ endpointsForService(project string, serviceName string) (*kapi.Endpoints, error) |
|
| 59 |
+} |
| 0 | 60 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,98 @@ |
| 0 |
+package aggregated_logging |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "errors" |
|
| 4 |
+ "fmt" |
|
| 5 |
+ "net/url" |
|
| 6 |
+ "strings" |
|
| 7 |
+ |
|
| 8 |
+ kapi "k8s.io/kubernetes/pkg/api" |
|
| 9 |
+ kclient "k8s.io/kubernetes/pkg/client/unversioned" |
|
| 10 |
+ "k8s.io/kubernetes/pkg/util/sets" |
|
| 11 |
+ |
|
| 12 |
+ "github.com/openshift/origin/pkg/client" |
|
| 13 |
+ "github.com/openshift/origin/pkg/diagnostics/types" |
|
| 14 |
+ oauthapi "github.com/openshift/origin/pkg/oauth/api" |
|
| 15 |
+) |
|
| 16 |
+ |
|
| 17 |
+const ( |
|
| 18 |
+ kibanaProxyOauthClientName = "kibana-proxy" |
|
| 19 |
+ kibanaProxySecretName = "logging-kibana-proxy" |
|
| 20 |
+ oauthSecretKeyName = "oauth-secret" |
|
| 21 |
+) |
|
| 22 |
+ |
|
| 23 |
+//checkKibana verifies the various integration points between Kibana and logging |
|
| 24 |
+func checkKibana(r types.DiagnosticResult, osClient *client.Client, kClient *kclient.Client, project string) {
|
|
| 25 |
+ oauthclient, err := osClient.OAuthClients().Get(kibanaProxyOauthClientName) |
|
| 26 |
+ if err != nil {
|
|
| 27 |
+ r.Error("AGL0115", err, fmt.Sprintf("Error retrieving the OauthClient '%s'. Unable to check Kibana", kibanaProxyOauthClientName))
|
|
| 28 |
+ return |
|
| 29 |
+ } |
|
| 30 |
+ checkKibanaSecret(r, osClient, kClient, project, oauthclient) |
|
| 31 |
+ checkKibanaRoutesInOauthClient(r, osClient, project, oauthclient) |
|
| 32 |
+} |
|
| 33 |
+ |
|
| 34 |
+//checkKibanaSecret confirms the secret used by kibana matches that configured in the oauth client |
|
| 35 |
+func checkKibanaSecret(r types.DiagnosticResult, osClient *client.Client, kClient *kclient.Client, project string, oauthclient *oauthapi.OAuthClient) {
|
|
| 36 |
+ r.Debug("AGL0100", "Checking oauthclient secrets...")
|
|
| 37 |
+ secret, err := kClient.Secrets(project).Get(kibanaProxySecretName) |
|
| 38 |
+ if err != nil {
|
|
| 39 |
+ r.Error("AGL0105", err, fmt.Sprintf("Error retrieving the secret '%s'", kibanaProxySecretName))
|
|
| 40 |
+ return |
|
| 41 |
+ } |
|
| 42 |
+ decoded, err := decodeSecret(secret, oauthSecretKeyName) |
|
| 43 |
+ if err != nil {
|
|
| 44 |
+ r.Error("AGL0110", err, fmt.Sprintf("Unable to decode Kibana Secret"))
|
|
| 45 |
+ return |
|
| 46 |
+ } |
|
| 47 |
+ if decoded != oauthclient.Secret {
|
|
| 48 |
+ r.Debug("AGL0120", fmt.Sprintf("OauthClient Secret: '%s'", oauthclient.Secret))
|
|
| 49 |
+ r.Debug("AGL0125", fmt.Sprintf("Decoded Kibana Secret: '%s'", decoded))
|
|
| 50 |
+ message := fmt.Sprintf("The %s OauthClient.Secret does not match the decoded oauth secret in '%s'", kibanaProxyOauthClientName, kibanaProxySecretName)
|
|
| 51 |
+ r.Error("AGL0130", errors.New(message), message)
|
|
| 52 |
+ } |
|
| 53 |
+} |
|
| 54 |
+ |
|
| 55 |
+//checkKibanaRoutesInOauthClient verifies the client contains the correct redirect uris |
|
| 56 |
+func checkKibanaRoutesInOauthClient(r types.DiagnosticResult, osClient *client.Client, project string, oauthclient *oauthapi.OAuthClient) {
|
|
| 57 |
+ r.Debug("AGL0141", "Checking oauthclient redirectURIs for the logging routes...")
|
|
| 58 |
+ routeList, err := osClient.Routes(project).List(kapi.ListOptions{LabelSelector: loggingSelector.AsSelector()})
|
|
| 59 |
+ if err != nil {
|
|
| 60 |
+ r.Error("AGL0143", err, "Error retrieving the logging routes.")
|
|
| 61 |
+ return |
|
| 62 |
+ } |
|
| 63 |
+ redirectUris, err := parseRedirectUris(oauthclient.RedirectURIs) |
|
| 64 |
+ if err != nil {
|
|
| 65 |
+ r.Error("AGL0145", err, "Error parsing the OAuthClient.RedirectURIs")
|
|
| 66 |
+ return |
|
| 67 |
+ } |
|
| 68 |
+ for _, route := range routeList.Items {
|
|
| 69 |
+ if !redirectUris.Has(route.Spec.Host) {
|
|
| 70 |
+ message := fmt.Sprintf("OauthClient '%s' does not include a redirectURI for route '%s' which is '%s'", oauthclient.ObjectMeta.Name, route.ObjectMeta.Name, route.Spec.Host)
|
|
| 71 |
+ r.Error("AGL0147", errors.New(message), message)
|
|
| 72 |
+ } |
|
| 73 |
+ } |
|
| 74 |
+ |
|
| 75 |
+ return |
|
| 76 |
+} |
|
| 77 |
+ |
|
| 78 |
+func parseRedirectUris(uris []string) (sets.String, error) {
|
|
| 79 |
+ urls := sets.String{}
|
|
| 80 |
+ for _, uri := range uris {
|
|
| 81 |
+ url, err := url.Parse(uri) |
|
| 82 |
+ if err != nil {
|
|
| 83 |
+ return urls, err |
|
| 84 |
+ } |
|
| 85 |
+ urls.Insert(url.Host) |
|
| 86 |
+ } |
|
| 87 |
+ return urls, nil |
|
| 88 |
+} |
|
| 89 |
+ |
|
| 90 |
+// decodeSecret decodes a base64 encoded entry in a secret and returns the value as decoded string |
|
| 91 |
+func decodeSecret(secret *kapi.Secret, key string) (string, error) {
|
|
| 92 |
+ value, ok := secret.Data[key] |
|
| 93 |
+ if !ok {
|
|
| 94 |
+ return "", errors.New(fmt.Sprintf("The %s secret did not have a data entry for %s", secret.ObjectMeta.Name, key))
|
|
| 95 |
+ } |
|
| 96 |
+ return strings.TrimSpace(string(value)), nil |
|
| 97 |
+} |
| 0 | 98 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,95 @@ |
| 0 |
+package aggregated_logging |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "crypto/tls" |
|
| 4 |
+ "crypto/x509" |
|
| 5 |
+ "encoding/pem" |
|
| 6 |
+ "errors" |
|
| 7 |
+ "fmt" |
|
| 8 |
+ |
|
| 9 |
+ kapi "k8s.io/kubernetes/pkg/api" |
|
| 10 |
+ |
|
| 11 |
+ routes "github.com/openshift/origin/pkg/route/api" |
|
| 12 |
+) |
|
| 13 |
+ |
|
| 14 |
+const routeUnaccepted = ` |
|
| 15 |
+An unaccepted route is most likely due to one of the following reasons: |
|
| 16 |
+ |
|
| 17 |
+* No router has been deployed to serve the route. |
|
| 18 |
+* Another route with the same host already exists. |
|
| 19 |
+ |
|
| 20 |
+If a router has been deployed, look for duplicate matching routes by |
|
| 21 |
+running the following: |
|
| 22 |
+ |
|
| 23 |
+ oc get --all-namespaces routes --template='{{range .items}}{{if eq .spec.host "%[2]s"}}{{println .metadata.name "in" .metadata.namespace}}{{end}}{{end}}'
|
|
| 24 |
+ |
|
| 25 |
+` |
|
| 26 |
+const routeCertMissingHostName = ` |
|
| 27 |
+Try updating the route certificate to include its host as either the CommonName (CN) or one of the alternate names. |
|
| 28 |
+` |
|
| 29 |
+ |
|
| 30 |
+//checkRoutes looks through the logging infra routes to see if they have been accepted, and ... |
|
| 31 |
+func checkRoutes(r diagnosticReporter, adapter routesAdapter, project string) {
|
|
| 32 |
+ r.Debug("AGL0300", "Checking routes...")
|
|
| 33 |
+ routeList, err := adapter.routes(project, kapi.ListOptions{LabelSelector: loggingSelector.AsSelector()})
|
|
| 34 |
+ if err != nil {
|
|
| 35 |
+ r.Error("AGL0305", err, fmt.Sprintf("There was an error retrieving routes in the project '%s' with selector '%s'", project, loggingSelector.AsSelector()))
|
|
| 36 |
+ return |
|
| 37 |
+ } |
|
| 38 |
+ if len(routeList.Items) == 0 {
|
|
| 39 |
+ r.Error("AGL0310", nil, fmt.Sprintf("There were no routes found to support logging in project '%s'", project))
|
|
| 40 |
+ return |
|
| 41 |
+ } |
|
| 42 |
+ for _, route := range routeList.Items {
|
|
| 43 |
+ if !wasAccepted(r, route) {
|
|
| 44 |
+ r.Error("AGL0325", nil, fmt.Sprintf("Route '%s' has not been accepted by any routers."+routeUnaccepted, route.ObjectMeta.Name, route.Spec.Host))
|
|
| 45 |
+ } |
|
| 46 |
+ if route.Spec.TLS != nil && len(route.Spec.TLS.Certificate) != 0 && len(route.Spec.TLS.Key) != 0 {
|
|
| 47 |
+ checkRouteCertificate(r, route) |
|
| 48 |
+ } else {
|
|
| 49 |
+ r.Debug("AGL0331", fmt.Sprintf("Skipping key and certificate checks on route '%s'. Either of them may be missing.", route.ObjectMeta.Name))
|
|
| 50 |
+ } |
|
| 51 |
+ } |
|
| 52 |
+} |
|
| 53 |
+ |
|
| 54 |
+func checkRouteCertificate(r diagnosticReporter, route routes.Route) {
|
|
| 55 |
+ r.Debug("AGL0330", fmt.Sprintf("Checking certificate for route '%s'...", route.ObjectMeta.Name))
|
|
| 56 |
+ block, _ := pem.Decode([]byte(route.Spec.TLS.Certificate)) |
|
| 57 |
+ //verify hostname |
|
| 58 |
+ if block != nil {
|
|
| 59 |
+ cert, err := x509.ParseCertificate(block.Bytes) |
|
| 60 |
+ if err != nil {
|
|
| 61 |
+ r.Error("AGL0335", err, fmt.Sprintf("Unable to parse the certificate for route '%s'", route.ObjectMeta.Name))
|
|
| 62 |
+ return |
|
| 63 |
+ } |
|
| 64 |
+ r.Debug("AGL0340", fmt.Sprintf("Cert CommonName: '%s' Cert DNSNames: '%s'", cert.Subject.CommonName, cert.DNSNames))
|
|
| 65 |
+ if err := cert.VerifyHostname(route.Spec.Host); err != nil {
|
|
| 66 |
+ r.Error("AGL0345", err, fmt.Sprintf("Route '%[1]s' certficate does not include route host '%[2]s'"+routeCertMissingHostName, route.ObjectMeta.Name, route.Spec.Host))
|
|
| 67 |
+ } |
|
| 68 |
+ } else {
|
|
| 69 |
+ r.Error("AGL0350", errors.New("Unable to decode the TLS Certificate"), "Unable to decode the TLS Certificate")
|
|
| 70 |
+ } |
|
| 71 |
+ |
|
| 72 |
+ //verify key matches cert |
|
| 73 |
+ r.Debug("AGL0355", fmt.Sprintf("Checking certificate matches key for route '%s'", route.ObjectMeta.Name))
|
|
| 74 |
+ _, err := tls.X509KeyPair([]byte(route.Spec.TLS.Certificate), []byte(route.Spec.TLS.Key)) |
|
| 75 |
+ if err != nil {
|
|
| 76 |
+ r.Error("AGL0365", err, fmt.Sprintf("Route '%s' key and certficate do not match: %s. The router will be unable to pass traffic using this route.", route.ObjectMeta.Name, err))
|
|
| 77 |
+ } |
|
| 78 |
+} |
|
| 79 |
+ |
|
| 80 |
+func wasAccepted(r diagnosticReporter, route routes.Route) bool {
|
|
| 81 |
+ r.Debug("AGL0310", fmt.Sprintf("Checking if route '%s' was accepted...", route.ObjectMeta.Name))
|
|
| 82 |
+ accepted := 0 |
|
| 83 |
+ for _, status := range route.Status.Ingress {
|
|
| 84 |
+ r.Debug("AGL0315", fmt.Sprintf("Status for router: '%s', host: '%s'", status.RouterName, status.Host))
|
|
| 85 |
+ for _, condition := range status.Conditions {
|
|
| 86 |
+ r.Debug("AGL0320", fmt.Sprintf("Condition type: '%s' status: '%s'", condition.Type, condition.Status))
|
|
| 87 |
+ if condition.Type == routes.RouteAdmitted && condition.Status == kapi.ConditionTrue {
|
|
| 88 |
+ accepted = accepted + 1 |
|
| 89 |
+ } |
|
| 90 |
+ } |
|
| 91 |
+ } |
|
| 92 |
+ //Add check to compare acceptance to the number of available routers? |
|
| 93 |
+ return accepted > 0 |
|
| 94 |
+} |
| 0 | 95 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,115 @@ |
| 0 |
+package aggregated_logging |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "errors" |
|
| 4 |
+ "testing" |
|
| 5 |
+ |
|
| 6 |
+ kapi "k8s.io/kubernetes/pkg/api" |
|
| 7 |
+ |
|
| 8 |
+ "github.com/openshift/origin/pkg/diagnostics/log" |
|
| 9 |
+ routesapi "github.com/openshift/origin/pkg/route/api" |
|
| 10 |
+) |
|
| 11 |
+ |
|
| 12 |
+const ( |
|
| 13 |
+ testRoutesKey = "routes" |
|
| 14 |
+) |
|
| 15 |
+ |
|
| 16 |
+type fakeRoutesDiagnostic struct {
|
|
| 17 |
+ fakeDiagnostic |
|
| 18 |
+ fakeRoutes routesapi.RouteList |
|
| 19 |
+ clienterrors map[string]error |
|
| 20 |
+} |
|
| 21 |
+ |
|
| 22 |
+func newFakeRoutesDiagnostic(t *testing.T) *fakeRoutesDiagnostic {
|
|
| 23 |
+ return &fakeRoutesDiagnostic{
|
|
| 24 |
+ fakeDiagnostic: *newFakeDiagnostic(t), |
|
| 25 |
+ clienterrors: map[string]error{},
|
|
| 26 |
+ } |
|
| 27 |
+} |
|
| 28 |
+ |
|
| 29 |
+func (f *fakeRoutesDiagnostic) addRouteWith(condType routesapi.RouteIngressConditionType, condStatus kapi.ConditionStatus, cert string, key string) {
|
|
| 30 |
+ ingress := routesapi.RouteIngress{
|
|
| 31 |
+ Conditions: []routesapi.RouteIngressCondition{
|
|
| 32 |
+ {
|
|
| 33 |
+ Type: condType, |
|
| 34 |
+ Status: condStatus, |
|
| 35 |
+ }, |
|
| 36 |
+ }, |
|
| 37 |
+ } |
|
| 38 |
+ route := routesapi.Route{
|
|
| 39 |
+ ObjectMeta: kapi.ObjectMeta{Name: "aname"},
|
|
| 40 |
+ Status: routesapi.RouteStatus{
|
|
| 41 |
+ Ingress: []routesapi.RouteIngress{ingress},
|
|
| 42 |
+ }, |
|
| 43 |
+ } |
|
| 44 |
+ if len(cert) != 0 && len(key) != 0 {
|
|
| 45 |
+ tls := routesapi.TLSConfig{
|
|
| 46 |
+ Certificate: cert, |
|
| 47 |
+ Key: key, |
|
| 48 |
+ } |
|
| 49 |
+ route.Spec.TLS = &tls |
|
| 50 |
+ } |
|
| 51 |
+ f.fakeRoutes.Items = append(f.fakeRoutes.Items, route) |
|
| 52 |
+} |
|
| 53 |
+ |
|
| 54 |
+func (f *fakeRoutesDiagnostic) routes(project string, options kapi.ListOptions) (*routesapi.RouteList, error) {
|
|
| 55 |
+ value, ok := f.clienterrors[testRoutesKey] |
|
| 56 |
+ if ok {
|
|
| 57 |
+ return nil, value |
|
| 58 |
+ } |
|
| 59 |
+ return &f.fakeRoutes, nil |
|
| 60 |
+} |
|
| 61 |
+ |
|
| 62 |
+func TestRouteWhenErrorFromClient(t *testing.T) {
|
|
| 63 |
+ d := newFakeRoutesDiagnostic(t) |
|
| 64 |
+ d.clienterrors[testRoutesKey] = errors.New("some client error")
|
|
| 65 |
+ |
|
| 66 |
+ checkRoutes(d, d, fakeProject) |
|
| 67 |
+ d.assertMessage("AGL0305", "Exp an error when there is a client error retrieving routes", log.ErrorLevel)
|
|
| 68 |
+ d.dumpMessages() |
|
| 69 |
+} |
|
| 70 |
+ |
|
| 71 |
+func TestRouteWhenZeroRoutesAvailable(t *testing.T) {
|
|
| 72 |
+ d := newFakeRoutesDiagnostic(t) |
|
| 73 |
+ |
|
| 74 |
+ checkRoutes(d, d, fakeProject) |
|
| 75 |
+ d.assertMessage("AGL0310", "Exp an error when there are no routes to support logging", log.ErrorLevel)
|
|
| 76 |
+ d.dumpMessages() |
|
| 77 |
+} |
|
| 78 |
+ |
|
| 79 |
+//test error route != accepted |
|
| 80 |
+func TestRouteWhenRouteNotAccepted(t *testing.T) {
|
|
| 81 |
+ d := newFakeRoutesDiagnostic(t) |
|
| 82 |
+ d.addRouteWith(routesapi.RouteExtendedValidationFailed, kapi.ConditionTrue, "", "") |
|
| 83 |
+ |
|
| 84 |
+ checkRoutes(d, d, fakeProject) |
|
| 85 |
+ d.assertMessage("AGL0325", "Exp an error when a route was not accepted", log.ErrorLevel)
|
|
| 86 |
+ d.assertMessage("AGL0331", "Exp to skip the cert check since none specified", log.DebugLevel)
|
|
| 87 |
+ d.dumpMessages() |
|
| 88 |
+} |
|
| 89 |
+func TestRouteWhenRouteAccepted(t *testing.T) {
|
|
| 90 |
+ d := newFakeRoutesDiagnostic(t) |
|
| 91 |
+ d.addRouteWith(routesapi.RouteAdmitted, kapi.ConditionTrue, "", "") |
|
| 92 |
+ |
|
| 93 |
+ checkRoutes(d, d, fakeProject) |
|
| 94 |
+ d.assertNoErrors() |
|
| 95 |
+ d.dumpMessages() |
|
| 96 |
+} |
|
| 97 |
+ |
|
| 98 |
+func TestRouteWhenErrorDecodingCert(t *testing.T) {
|
|
| 99 |
+ d := newFakeRoutesDiagnostic(t) |
|
| 100 |
+ d.addRouteWith(routesapi.RouteExtendedValidationFailed, kapi.ConditionTrue, "cert", "key") |
|
| 101 |
+ |
|
| 102 |
+ checkRoutes(d, d, fakeProject) |
|
| 103 |
+ d.assertMessage("AGL0350", "Exp an error when unable to decode cert", log.ErrorLevel)
|
|
| 104 |
+ d.dumpMessages() |
|
| 105 |
+} |
|
| 106 |
+ |
|
| 107 |
+func TestRouteWhenErrorParsingCert(t *testing.T) {
|
|
| 108 |
+ d := newFakeRoutesDiagnostic(t) |
|
| 109 |
+ d.addRouteWith(routesapi.RouteExtendedValidationFailed, kapi.ConditionTrue, "cert", "key") |
|
| 110 |
+ |
|
| 111 |
+ checkRoutes(d, d, fakeProject) |
|
| 112 |
+ d.assertMessage("AGL0350", "Exp an error when unable to decode cert", log.ErrorLevel)
|
|
| 113 |
+ d.dumpMessages() |
|
| 114 |
+} |
| 0 | 115 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,37 @@ |
| 0 |
+package aggregated_logging |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "fmt" |
|
| 4 |
+ |
|
| 5 |
+ "k8s.io/kubernetes/pkg/util/sets" |
|
| 6 |
+) |
|
| 7 |
+ |
|
| 8 |
+const sccPrivilegedName = "privileged" |
|
| 9 |
+ |
|
| 10 |
+var sccPrivilegedNames = sets.NewString(fluentdServiceAccountName) |
|
| 11 |
+ |
|
| 12 |
+const sccPrivilegedUnboundServiceAccount = ` |
|
| 13 |
+The ServiceAccount '%[1]s' does not have a privileged SecurityContextConstraint for project '%[2]s'. As a |
|
| 14 |
+user with a cluster-admin role, you can grant the permissions by running |
|
| 15 |
+the following: |
|
| 16 |
+ |
|
| 17 |
+ oadm policy add-scc-to-user privileged system:serviceaccount:%[2]s:%[1]s |
|
| 18 |
+` |
|
| 19 |
+ |
|
| 20 |
+func checkSccs(r diagnosticReporter, adapter sccAdapter, project string) {
|
|
| 21 |
+ r.Debug("AGL0700", "Checking SecurityContextConstraints...")
|
|
| 22 |
+ scc, err := adapter.getScc(sccPrivilegedName) |
|
| 23 |
+ if err != nil {
|
|
| 24 |
+ r.Error("AGL0705", err, fmt.Sprintf("There was an error while trying to retrieve the SecurityContextConstraints for the logging stack: %s", err))
|
|
| 25 |
+ return |
|
| 26 |
+ } |
|
| 27 |
+ privilegedUsers := sets.NewString() |
|
| 28 |
+ for _, user := range scc.Users {
|
|
| 29 |
+ privilegedUsers.Insert(user) |
|
| 30 |
+ } |
|
| 31 |
+ for _, name := range sccPrivilegedNames.List() {
|
|
| 32 |
+ if !privilegedUsers.Has(fmt.Sprintf("system:serviceaccount:%s:%s", project, name)) {
|
|
| 33 |
+ r.Error("AGL0710", nil, fmt.Sprintf(sccPrivilegedUnboundServiceAccount, name, project))
|
|
| 34 |
+ } |
|
| 35 |
+ } |
|
| 36 |
+} |
| 0 | 37 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,65 @@ |
| 0 |
+package aggregated_logging |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "encoding/json" |
|
| 4 |
+ "errors" |
|
| 5 |
+ "fmt" |
|
| 6 |
+ "testing" |
|
| 7 |
+ |
|
| 8 |
+ kapi "k8s.io/kubernetes/pkg/api" |
|
| 9 |
+ |
|
| 10 |
+ "github.com/openshift/origin/pkg/diagnostics/log" |
|
| 11 |
+) |
|
| 12 |
+ |
|
| 13 |
+type fakeSccDiagnostic struct {
|
|
| 14 |
+ fakeDiagnostic |
|
| 15 |
+ fakeScc kapi.SecurityContextConstraints |
|
| 16 |
+} |
|
| 17 |
+ |
|
| 18 |
+func newFakeSccDiagnostic(t *testing.T) *fakeSccDiagnostic {
|
|
| 19 |
+ return &fakeSccDiagnostic{
|
|
| 20 |
+ fakeDiagnostic: *newFakeDiagnostic(t), |
|
| 21 |
+ } |
|
| 22 |
+} |
|
| 23 |
+ |
|
| 24 |
+func (f *fakeSccDiagnostic) getScc(name string) (*kapi.SecurityContextConstraints, error) {
|
|
| 25 |
+ json, _ := json.Marshal(f.fakeScc) |
|
| 26 |
+ f.test.Logf(">> test#getScc(%s), err: %s, scc:%s", name, f.err, string(json))
|
|
| 27 |
+ if f.err != nil {
|
|
| 28 |
+ return nil, f.err |
|
| 29 |
+ } |
|
| 30 |
+ return &f.fakeScc, nil |
|
| 31 |
+} |
|
| 32 |
+ |
|
| 33 |
+func (f *fakeSccDiagnostic) addSccFor(name string, project string) {
|
|
| 34 |
+ f.fakeScc.Users = append(f.fakeScc.Users, fmt.Sprintf("system:serviceaccount:%s:%s", project, name))
|
|
| 35 |
+} |
|
| 36 |
+ |
|
| 37 |
+func TestCheckSccWhenClientReturnsError(t *testing.T) {
|
|
| 38 |
+ d := newFakeSccDiagnostic(t) |
|
| 39 |
+ d.err = errors.New("client error")
|
|
| 40 |
+ |
|
| 41 |
+ checkSccs(d, d, fakeProject) |
|
| 42 |
+ |
|
| 43 |
+ d.assertMessage("AGL0705", "Exp error when client returns error getting SCC", log.ErrorLevel)
|
|
| 44 |
+ d.dumpMessages() |
|
| 45 |
+} |
|
| 46 |
+ |
|
| 47 |
+func TestCheckSccWhenMissingPrivelegedUsers(t *testing.T) {
|
|
| 48 |
+ d := newFakeSccDiagnostic(t) |
|
| 49 |
+ |
|
| 50 |
+ checkSccs(d, d, fakeProject) |
|
| 51 |
+ |
|
| 52 |
+ d.assertMessage("AGL0710", "Exp error when SCC is missing exp service acount", log.ErrorLevel)
|
|
| 53 |
+ d.dumpMessages() |
|
| 54 |
+} |
|
| 55 |
+ |
|
| 56 |
+func TestCheckSccWhenEverytingExists(t *testing.T) {
|
|
| 57 |
+ d := newFakeSccDiagnostic(t) |
|
| 58 |
+ d.addSccFor(fluentdServiceAccountName, fakeProject) |
|
| 59 |
+ |
|
| 60 |
+ checkSccs(d, d, fakeProject) |
|
| 61 |
+ |
|
| 62 |
+ d.assertNoErrors() |
|
| 63 |
+ d.dumpMessages() |
|
| 64 |
+} |
| 0 | 65 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,38 @@ |
| 0 |
+package aggregated_logging |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "fmt" |
|
| 4 |
+ "strings" |
|
| 5 |
+ |
|
| 6 |
+ kapi "k8s.io/kubernetes/pkg/api" |
|
| 7 |
+ "k8s.io/kubernetes/pkg/util/sets" |
|
| 8 |
+) |
|
| 9 |
+ |
|
| 10 |
+var serviceAccountNames = sets.NewString("logging-deployer", "aggregated-logging-kibana", "aggregated-logging-curator", "aggregated-logging-elasticsearch", fluentdServiceAccountName)
|
|
| 11 |
+ |
|
| 12 |
+const serviceAccountsMissing = ` |
|
| 13 |
+Did not find ServiceAccounts: %s. The logging infrastructure will not function |
|
| 14 |
+properly without them. You may need to re-run the installer. |
|
| 15 |
+` |
|
| 16 |
+ |
|
| 17 |
+func checkServiceAccounts(d diagnosticReporter, f saAdapter, project string) {
|
|
| 18 |
+ d.Debug("AGL0500", fmt.Sprintf("Checking ServiceAccounts in project '%s'...", project))
|
|
| 19 |
+ saList, err := f.serviceAccounts(project, kapi.ListOptions{})
|
|
| 20 |
+ if err != nil {
|
|
| 21 |
+ d.Error("AGL0505", err, fmt.Sprintf("There was an error while trying to retrieve the pods for the AggregatedLogging stack: %s", err))
|
|
| 22 |
+ return |
|
| 23 |
+ } |
|
| 24 |
+ foundNames := sets.NewString() |
|
| 25 |
+ for _, sa := range saList.Items {
|
|
| 26 |
+ foundNames.Insert(sa.ObjectMeta.Name) |
|
| 27 |
+ } |
|
| 28 |
+ missing := sets.NewString() |
|
| 29 |
+ for _, name := range serviceAccountNames.List() {
|
|
| 30 |
+ if !foundNames.Has(name) {
|
|
| 31 |
+ missing.Insert(name) |
|
| 32 |
+ } |
|
| 33 |
+ } |
|
| 34 |
+ if missing.Len() != 0 {
|
|
| 35 |
+ d.Error("AGL0515", nil, fmt.Sprintf(serviceAccountsMissing, strings.Join(missing.List(), ",")))
|
|
| 36 |
+ } |
|
| 37 |
+} |
| 0 | 38 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,64 @@ |
| 0 |
+package aggregated_logging |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "errors" |
|
| 4 |
+ "testing" |
|
| 5 |
+ |
|
| 6 |
+ kapi "k8s.io/kubernetes/pkg/api" |
|
| 7 |
+ |
|
| 8 |
+ "github.com/openshift/origin/pkg/diagnostics/log" |
|
| 9 |
+) |
|
| 10 |
+ |
|
| 11 |
+type mockServiceAccountDiagnostic struct {
|
|
| 12 |
+ accounts kapi.ServiceAccountList |
|
| 13 |
+ fakeDiagnostic |
|
| 14 |
+} |
|
| 15 |
+ |
|
| 16 |
+func newMockServiceAccountDiagnostic(t *testing.T) *mockServiceAccountDiagnostic {
|
|
| 17 |
+ return &mockServiceAccountDiagnostic{
|
|
| 18 |
+ accounts: kapi.ServiceAccountList{},
|
|
| 19 |
+ fakeDiagnostic: *newFakeDiagnostic(t), |
|
| 20 |
+ } |
|
| 21 |
+} |
|
| 22 |
+ |
|
| 23 |
+func (m *mockServiceAccountDiagnostic) serviceAccounts(project string, options kapi.ListOptions) (*kapi.ServiceAccountList, error) {
|
|
| 24 |
+ if m.err != nil {
|
|
| 25 |
+ return &m.accounts, m.err |
|
| 26 |
+ } |
|
| 27 |
+ return &m.accounts, nil |
|
| 28 |
+} |
|
| 29 |
+ |
|
| 30 |
+func (d *mockServiceAccountDiagnostic) addServiceAccountNamed(name string) {
|
|
| 31 |
+ meta := kapi.ObjectMeta{Name: name}
|
|
| 32 |
+ d.accounts.Items = append(d.accounts.Items, kapi.ServiceAccount{ObjectMeta: meta})
|
|
| 33 |
+} |
|
| 34 |
+ |
|
| 35 |
+func TestCheckingServiceAccountsWhenFailedResponseFromClient(t *testing.T) {
|
|
| 36 |
+ d := newMockServiceAccountDiagnostic(t) |
|
| 37 |
+ d.err = errors.New("Some Error")
|
|
| 38 |
+ checkServiceAccounts(d, d, fakeProject) |
|
| 39 |
+ d.assertMessage("AGL0505",
|
|
| 40 |
+ "Exp an error when unable to retrieve service accounts because of a client error", |
|
| 41 |
+ log.ErrorLevel) |
|
| 42 |
+} |
|
| 43 |
+ |
|
| 44 |
+func TestCheckingServiceAccountsWhenMissingExpectedServiceAccount(t *testing.T) {
|
|
| 45 |
+ d := newMockServiceAccountDiagnostic(t) |
|
| 46 |
+ d.addServiceAccountNamed("foobar")
|
|
| 47 |
+ |
|
| 48 |
+ checkServiceAccounts(d, d, fakeProject) |
|
| 49 |
+ d.assertMessage("AGL0515",
|
|
| 50 |
+ "Exp an error when an expected service account was not found.", |
|
| 51 |
+ log.ErrorLevel) |
|
| 52 |
+} |
|
| 53 |
+ |
|
| 54 |
+func TestCheckingServiceAccountsIsOk(t *testing.T) {
|
|
| 55 |
+ d := newMockServiceAccountDiagnostic(t) |
|
| 56 |
+ |
|
| 57 |
+ for _, name := range serviceAccountNames.List() {
|
|
| 58 |
+ d.addServiceAccountNamed(name) |
|
| 59 |
+ } |
|
| 60 |
+ |
|
| 61 |
+ checkServiceAccounts(d, d, fakeProject) |
|
| 62 |
+ d.assertNoErrors() |
|
| 63 |
+} |
| 0 | 64 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,61 @@ |
| 0 |
+package aggregated_logging |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "fmt" |
|
| 4 |
+ "strings" |
|
| 5 |
+ |
|
| 6 |
+ kapi "k8s.io/kubernetes/pkg/api" |
|
| 7 |
+ "k8s.io/kubernetes/pkg/util/sets" |
|
| 8 |
+) |
|
| 9 |
+ |
|
| 10 |
+var loggingServices = sets.NewString("logging-es", "logging-es-cluster", "logging-es-ops", "logging-es-ops-cluster", "logging-kibana", "logging-kibana-ops")
|
|
| 11 |
+ |
|
| 12 |
+const serviceNotFound = ` |
|
| 13 |
+Expected to find '%s' among the logging services for the project but did not. |
|
| 14 |
+` |
|
| 15 |
+const serviceOpsNotFound = ` |
|
| 16 |
+Expected to find '%s' among the logging services for the project but did not. This |
|
| 17 |
+may not matter if you chose not to install a separate logging stack to support operations. |
|
| 18 |
+` |
|
| 19 |
+ |
|
| 20 |
+// checkServices looks to see if the aggregated logging services exist |
|
| 21 |
+func checkServices(r diagnosticReporter, adapter servicesAdapter, project string) {
|
|
| 22 |
+ r.Debug("AGL0200", fmt.Sprintf("Checking for services in project '%s' with selector '%s'", project, loggingSelector.AsSelector()))
|
|
| 23 |
+ serviceList, err := adapter.services(project, kapi.ListOptions{LabelSelector: loggingSelector.AsSelector()})
|
|
| 24 |
+ if err != nil {
|
|
| 25 |
+ r.Error("AGL0205", err, fmt.Sprintf("There was an error while trying to retrieve the logging services: %s", err))
|
|
| 26 |
+ return |
|
| 27 |
+ } |
|
| 28 |
+ foundServices := sets.NewString() |
|
| 29 |
+ for _, service := range serviceList.Items {
|
|
| 30 |
+ foundServices.Insert(service.ObjectMeta.Name) |
|
| 31 |
+ r.Debug("AGL0210", fmt.Sprintf("Retrieved service '%s'", service.ObjectMeta.Name))
|
|
| 32 |
+ } |
|
| 33 |
+ for _, service := range loggingServices.List() {
|
|
| 34 |
+ if foundServices.Has(service) {
|
|
| 35 |
+ checkServiceEndpoints(r, adapter, project, service) |
|
| 36 |
+ } else {
|
|
| 37 |
+ if strings.Contains(service, "-ops") {
|
|
| 38 |
+ r.Warn("AGL0215", nil, fmt.Sprintf(serviceOpsNotFound, service))
|
|
| 39 |
+ } else {
|
|
| 40 |
+ r.Error("AGL0217", nil, fmt.Sprintf(serviceNotFound, service))
|
|
| 41 |
+ } |
|
| 42 |
+ } |
|
| 43 |
+ } |
|
| 44 |
+} |
|
| 45 |
+ |
|
| 46 |
+// checkServiceEndpoints validates if there is an available endpoint for the service. |
|
| 47 |
+func checkServiceEndpoints(r diagnosticReporter, adapter servicesAdapter, project string, service string) {
|
|
| 48 |
+ endpoints, err := adapter.endpointsForService(project, service) |
|
| 49 |
+ if err != nil {
|
|
| 50 |
+ r.Warn("AGL0220", err, fmt.Sprintf("Unable to retrieve endpoints for service '%s': %s", service, err))
|
|
| 51 |
+ return |
|
| 52 |
+ } |
|
| 53 |
+ if len(endpoints.Subsets) == 0 {
|
|
| 54 |
+ if strings.Contains(service, "-ops") {
|
|
| 55 |
+ r.Info("AGL0223", fmt.Sprintf("There are no endpoints found for service '%s'. This could mean you choose not to install a separate operations cluster during installation.", service))
|
|
| 56 |
+ } else {
|
|
| 57 |
+ r.Warn("AGL0225", nil, fmt.Sprintf("There are no endpoints found for service '%s'. This means there are no pods serviced by this service and this component is not functioning", service))
|
|
| 58 |
+ } |
|
| 59 |
+ } |
|
| 60 |
+} |
| 0 | 61 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,104 @@ |
| 0 |
+package aggregated_logging |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "errors" |
|
| 4 |
+ "testing" |
|
| 5 |
+ |
|
| 6 |
+ "github.com/openshift/origin/pkg/diagnostics/log" |
|
| 7 |
+ kapi "k8s.io/kubernetes/pkg/api" |
|
| 8 |
+) |
|
| 9 |
+ |
|
| 10 |
+type fakeServicesDiagnostic struct {
|
|
| 11 |
+ list kapi.ServiceList |
|
| 12 |
+ fakeDiagnostic |
|
| 13 |
+ endpoints map[string]kapi.Endpoints |
|
| 14 |
+ endpointErr error |
|
| 15 |
+} |
|
| 16 |
+ |
|
| 17 |
+func newFakeServicesDiagnostic(t *testing.T) *fakeServicesDiagnostic {
|
|
| 18 |
+ return &fakeServicesDiagnostic{
|
|
| 19 |
+ list: kapi.ServiceList{},
|
|
| 20 |
+ fakeDiagnostic: *newFakeDiagnostic(t), |
|
| 21 |
+ endpoints: map[string]kapi.Endpoints{},
|
|
| 22 |
+ } |
|
| 23 |
+} |
|
| 24 |
+ |
|
| 25 |
+func (f *fakeServicesDiagnostic) services(project string, options kapi.ListOptions) (*kapi.ServiceList, error) {
|
|
| 26 |
+ if f.err != nil {
|
|
| 27 |
+ return &f.list, f.err |
|
| 28 |
+ } |
|
| 29 |
+ return &f.list, nil |
|
| 30 |
+} |
|
| 31 |
+func (f *fakeServicesDiagnostic) endpointsForService(project string, service string) (*kapi.Endpoints, error) {
|
|
| 32 |
+ if f.endpointErr != nil {
|
|
| 33 |
+ return nil, f.endpointErr |
|
| 34 |
+ } |
|
| 35 |
+ endpoints, _ := f.endpoints[service] |
|
| 36 |
+ return &endpoints, nil |
|
| 37 |
+} |
|
| 38 |
+ |
|
| 39 |
+func (f *fakeServicesDiagnostic) addEndpointSubsetTo(service string) {
|
|
| 40 |
+ endpoints := kapi.Endpoints{}
|
|
| 41 |
+ endpoints.Subsets = []kapi.EndpointSubset{{}}
|
|
| 42 |
+ f.endpoints[service] = endpoints |
|
| 43 |
+} |
|
| 44 |
+ |
|
| 45 |
+func (f *fakeServicesDiagnostic) addServiceNamed(name string) {
|
|
| 46 |
+ meta := kapi.ObjectMeta{Name: name}
|
|
| 47 |
+ f.list.Items = append(f.list.Items, kapi.Service{ObjectMeta: meta})
|
|
| 48 |
+} |
|
| 49 |
+ |
|
| 50 |
+// test error from client |
|
| 51 |
+func TestCheckingServicesWhenFailedResponseFromClient(t *testing.T) {
|
|
| 52 |
+ d := newFakeServicesDiagnostic(t) |
|
| 53 |
+ d.err = errors.New("an error")
|
|
| 54 |
+ checkServices(d, d, fakeProject) |
|
| 55 |
+ d.assertMessage("AGL0205",
|
|
| 56 |
+ "Exp an error when unable to retrieve services because of a client error", |
|
| 57 |
+ log.ErrorLevel) |
|
| 58 |
+} |
|
| 59 |
+ |
|
| 60 |
+func TestCheckingServicesWhenMissingServices(t *testing.T) {
|
|
| 61 |
+ d := newFakeServicesDiagnostic(t) |
|
| 62 |
+ d.addServiceNamed("logging-es")
|
|
| 63 |
+ |
|
| 64 |
+ checkServices(d, d, fakeProject) |
|
| 65 |
+ d.assertMessage("AGL0215",
|
|
| 66 |
+ "Exp an warning when an expected sercies is not found", |
|
| 67 |
+ log.WarnLevel) |
|
| 68 |
+} |
|
| 69 |
+ |
|
| 70 |
+func TestCheckingServicesWarnsWhenRetrievingEndpointsErrors(t *testing.T) {
|
|
| 71 |
+ d := newFakeServicesDiagnostic(t) |
|
| 72 |
+ d.addServiceNamed("logging-es")
|
|
| 73 |
+ d.endpointErr = errors.New("an endpoint error")
|
|
| 74 |
+ |
|
| 75 |
+ checkServices(d, d, fakeProject) |
|
| 76 |
+ d.assertMessage("AGL0220",
|
|
| 77 |
+ "Exp a warning when there is an error retrieving endpoints for a service", |
|
| 78 |
+ log.WarnLevel) |
|
| 79 |
+} |
|
| 80 |
+ |
|
| 81 |
+func TestCheckingServicesWarnsWhenServiceHasNoEndpoints(t *testing.T) {
|
|
| 82 |
+ d := newFakeServicesDiagnostic(t) |
|
| 83 |
+ for _, service := range loggingServices.List() {
|
|
| 84 |
+ d.addServiceNamed(service) |
|
| 85 |
+ } |
|
| 86 |
+ |
|
| 87 |
+ checkServices(d, d, fakeProject) |
|
| 88 |
+ d.assertMessage("AGL0225",
|
|
| 89 |
+ "Exp a warning when an expected service has no endpoints", |
|
| 90 |
+ log.WarnLevel) |
|
| 91 |
+} |
|
| 92 |
+ |
|
| 93 |
+func TestCheckingServicesHasNoErrorsOrWarningsForExpServices(t *testing.T) {
|
|
| 94 |
+ d := newFakeServicesDiagnostic(t) |
|
| 95 |
+ for _, service := range loggingServices.List() {
|
|
| 96 |
+ d.addServiceNamed(service) |
|
| 97 |
+ d.addEndpointSubsetTo(service) |
|
| 98 |
+ } |
|
| 99 |
+ |
|
| 100 |
+ checkServices(d, d, fakeProject) |
|
| 101 |
+ d.assertNoErrors() |
|
| 102 |
+ d.assertNoWarnings() |
|
| 103 |
+} |