Browse code

diagnostics: complete refactor

master/node/client subcommands went away in favor of client/cluster/host diagnostic builders.
Diagnostic logging was completely refactored.
Diagnostics now return a result object instead of logging
directly so they could be run in parallel (though they don't yet).
Updated the help accordingly.

Luke Meyer authored on 2015/07/20 18:25:08
Showing 35 changed files
... ...
@@ -2,135 +2,44 @@ package diagnostics
2 2
 
3 3
 import (
4 4
 	"fmt"
5
-	"io"
6
-	"os"
7 5
 
8
-	"github.com/spf13/cobra"
9
-
10
-	kclient "github.com/GoogleCloudPlatform/kubernetes/pkg/client"
11
-	kclientcmdapi "github.com/GoogleCloudPlatform/kubernetes/pkg/client/clientcmd/api"
12
-	kcmdutil "github.com/GoogleCloudPlatform/kubernetes/pkg/kubectl/cmd/util"
6
+	clientcmdapi "github.com/GoogleCloudPlatform/kubernetes/pkg/client/clientcmd/api"
13 7
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
14 8
 
15
-	diagnosticflags "github.com/openshift/origin/pkg/cmd/experimental/diagnostics/options"
16
-	osclientcmd "github.com/openshift/origin/pkg/cmd/util/clientcmd"
17 9
 	clientdiagnostics "github.com/openshift/origin/pkg/diagnostics/client"
18
-	"github.com/openshift/origin/pkg/diagnostics/log"
19
-	diagnostictypes "github.com/openshift/origin/pkg/diagnostics/types/diagnostic"
10
+	"github.com/openshift/origin/pkg/diagnostics/types"
20 11
 )
21 12
 
22
-const ClientDiagnosticsRecommendedName = "client"
13
+const (
14
+	ConfigContexts = "ConfigContexts"
15
+)
23 16
 
24 17
 var (
25
-	AvailableClientDiagnostics = util.NewStringSet("ConfigContexts", "NodeDefinitions")
18
+	AvailableClientDiagnostics = util.NewStringSet(ConfigContexts) // add more diagnostics as they are defined
26 19
 )
27 20
 
28
-// user options for openshift-diagnostics client command
29
-type ClientDiagnosticsOptions struct {
30
-	RequestedDiagnostics util.StringList
31
-
32
-	KubeClient *kclient.Client
33
-	KubeConfig *kclientcmdapi.Config
34
-
35
-	LogOptions *log.LoggerOptions
36
-	Logger     *log.Logger
37
-}
21
+func (o DiagnosticsOptions) buildClientDiagnostics(rawConfig *clientcmdapi.Config) ([]types.Diagnostic, bool /* ok */, error) {
38 22
 
39
-const longClientDescription = `
40
-OpenShift Diagnostics
41
-
42
-This command helps you understand and troubleshoot OpenShift as a user. It is
43
-intended to be run from the same context as an OpenShift client
44
-("openshift cli" or "osc") and with the same configuration options.
45
-
46
-    $ %s
47
-`
48
-
49
-func NewClientCommand(name string, fullName string, out io.Writer) *cobra.Command {
50
-	o := &ClientDiagnosticsOptions{
51
-		RequestedDiagnostics: AvailableClientDiagnostics.List(),
52
-		LogOptions:           &log.LoggerOptions{Out: out},
23
+	osClient, kubeClient, clientErr := o.Factory.Clients()
24
+	_ = osClient   // remove once a diagnostic makes use of OpenShift client
25
+	_ = kubeClient // remove once a diagnostic makes use of kube client
26
+	if clientErr != nil {
27
+		o.Logger.Notice("clLoadDefaultFailed", "Failed creating client from config; client diagnostics will be limited to config testing")
28
+		AvailableClientDiagnostics = util.NewStringSet(ConfigContexts)
53 29
 	}
54 30
 
55
-	var factory *osclientcmd.Factory
56
-
57
-	cmd := &cobra.Command{
58
-		Use:   name,
59
-		Short: "Troubleshoot using the OpenShift v3 client.",
60
-		Long:  fmt.Sprintf(longClientDescription, fullName),
61
-		Run: func(c *cobra.Command, args []string) {
62
-			kcmdutil.CheckErr(o.Complete())
63
-
64
-			_, kubeClient, err := factory.Clients()
65
-			kcmdutil.CheckErr(err)
66
-
67
-			kubeConfig, err := factory.OpenShiftClientConfig.RawConfig()
68
-			kcmdutil.CheckErr(err)
69
-
70
-			o.KubeClient = kubeClient
71
-			o.KubeConfig = &kubeConfig
72
-
73
-			failed, err := o.RunDiagnostics()
74
-			o.Logger.Summary()
75
-			o.Logger.Finish()
76
-
77
-			kcmdutil.CheckErr(err)
78
-			if failed {
79
-				os.Exit(255)
80
-			}
81
-
82
-		},
83
-	}
84
-	cmd.SetOutput(out)                     // for output re: usage / help
85
-	factory = osclientcmd.New(cmd.Flags()) // side effect: add standard persistent flags for openshift client
86
-	diagnosticflags.BindLoggerOptionFlags(cmd.Flags(), o.LogOptions, diagnosticflags.RecommendedLoggerOptionFlags())
87
-	diagnosticflags.BindDiagnosticFlag(cmd.Flags(), &o.RequestedDiagnostics, diagnosticflags.NewRecommendedDiagnosticFlag())
88
-
89
-	return cmd
90
-}
91
-
92
-func (o *ClientDiagnosticsOptions) Complete() error {
93
-	var err error
94
-	o.Logger, err = o.LogOptions.NewLogger()
95
-	if err != nil {
96
-		return err
97
-	}
98
-
99
-	return nil
100
-}
101
-
102
-func (o ClientDiagnosticsOptions) RunDiagnostics() (bool, error) {
103
-	diagnostics := map[string]diagnostictypes.Diagnostic{}
104
-
105
-	for _, diagnosticName := range o.RequestedDiagnostics {
31
+	diagnostics := []types.Diagnostic{}
32
+	requestedDiagnostics := intersection(util.NewStringSet(o.RequestedDiagnostics...), AvailableClientDiagnostics).List()
33
+	for _, diagnosticName := range requestedDiagnostics {
106 34
 		switch diagnosticName {
107
-		case "ConfigContexts":
108
-			for contextName, _ := range o.KubeConfig.Contexts {
109
-				diagnostics[diagnosticName+"["+contextName+"]"] = clientdiagnostics.ConfigContext{o.KubeConfig, contextName, o.Logger}
35
+		case ConfigContexts:
36
+			for contextName := range rawConfig.Contexts {
37
+				diagnostics = append(diagnostics, clientdiagnostics.ConfigContext{rawConfig, contextName})
110 38
 			}
111 39
 
112
-		case "NodeDefinitions":
113
-			diagnostics[diagnosticName] = clientdiagnostics.NodeDefinition{o.KubeClient, o.Logger}
114
-
115 40
 		default:
116
-			return false, fmt.Errorf("unknown diagnostic: %v", diagnosticName)
117
-		}
118
-	}
119
-
120
-	for name, diagnostic := range diagnostics {
121
-
122
-		if canRun, reason := diagnostic.CanRun(); !canRun {
123
-			if reason == nil {
124
-				o.Logger.Noticem(log.Message{ID: "diagSkip", Template: "Skipping diagnostic: {{.area}}.{{.name}}\nDescription: {{.diag}}", TemplateData: map[string]string{"area": "client", "name": name, "diag": diagnostic.Description()}})
125
-			} else {
126
-				o.Logger.Noticem(log.Message{ID: "diagSkip", Template: "Skipping diagnostic: {{.area}}.{{.name}}\nDescription: {{.diag}}\nBecause: {{.reason}}", TemplateData: map[string]string{"area": "client", "name": name, "diag": diagnostic.Description(), "reason": reason.Error()}})
127
-			}
128
-			continue
41
+			return nil, false, fmt.Errorf("unknown diagnostic: %v", diagnosticName)
129 42
 		}
130
-
131
-		o.Logger.Noticem(log.Message{ID: "diagRun", Template: "Running diagnostic: {{.area}}.{{.name}}\nDescription: {{.diag}}", TemplateData: map[string]string{"area": "client", "name": name, "diag": diagnostic.Description()}})
132
-		diagnostic.Check()
133 43
 	}
134
-
135
-	return o.Logger.ErrorsSeen(), nil
44
+	return diagnostics, true, clientErr
136 45
 }
137 46
new file mode 100644
... ...
@@ -0,0 +1,111 @@
0
+package diagnostics
1
+
2
+import (
3
+	"fmt"
4
+	"strings"
5
+
6
+	kclient "github.com/GoogleCloudPlatform/kubernetes/pkg/client"
7
+	clientcmd "github.com/GoogleCloudPlatform/kubernetes/pkg/client/clientcmd"
8
+	clientcmdapi "github.com/GoogleCloudPlatform/kubernetes/pkg/client/clientcmd/api"
9
+	"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
10
+
11
+	authorizationapi "github.com/openshift/origin/pkg/authorization/api"
12
+	"github.com/openshift/origin/pkg/client"
13
+	osclientcmd "github.com/openshift/origin/pkg/cmd/util/clientcmd"
14
+
15
+	clustdiags "github.com/openshift/origin/pkg/diagnostics/cluster"
16
+	"github.com/openshift/origin/pkg/diagnostics/types"
17
+)
18
+
19
+var (
20
+	AvailableClusterDiagnostics = util.NewStringSet("NodeDefinitions")
21
+)
22
+
23
+func (o DiagnosticsOptions) buildClusterDiagnostics(rawConfig *clientcmdapi.Config) ([]types.Diagnostic, bool /* ok */, error) {
24
+	requestedDiagnostics := intersection(util.NewStringSet(o.RequestedDiagnostics...), AvailableClusterDiagnostics).List()
25
+	if len(requestedDiagnostics) == 0 { // no diagnostics to run here
26
+		return nil, true, nil // don't waste time on discovery
27
+	}
28
+
29
+	var clusterClient *client.Client
30
+	var kclusterClient *kclient.Client
31
+
32
+	clusterClient, kclusterClient, found, err := o.findClusterClients(rawConfig)
33
+	if !found {
34
+		o.Logger.Notice("noClustCtx", "No cluster-admin client config found; skipping cluster diagnostics.")
35
+		return nil, false, err
36
+	}
37
+
38
+	diagnostics := []types.Diagnostic{}
39
+	for _, diagnosticName := range requestedDiagnostics {
40
+		switch diagnosticName {
41
+		case "NodeDefinitions":
42
+			diagnostics = append(diagnostics, clustdiags.NodeDefinitions{kclusterClient})
43
+
44
+		default:
45
+			return nil, false, fmt.Errorf("unknown diagnostic: %v", diagnosticName)
46
+		}
47
+	}
48
+	return diagnostics, true, nil
49
+}
50
+
51
+func (o DiagnosticsOptions) findClusterClients(rawConfig *clientcmdapi.Config) (*client.Client, *kclient.Client, bool, error) {
52
+	if o.ClientClusterContext != "" { // user has specified cluster context to use
53
+		if context, exists := rawConfig.Contexts[o.ClientClusterContext]; exists {
54
+			configErr := fmt.Errorf("Specified '%s' as cluster-admin context, but it was not found in your client configuration.", o.ClientClusterContext)
55
+			o.Logger.Error("discClustCtx", configErr.Error())
56
+			return nil, nil, false, configErr
57
+		} else if os, kube, found, err := o.makeClusterClients(rawConfig, o.ClientClusterContext, context); found {
58
+			return os, kube, true, err
59
+		} else {
60
+			return nil, nil, false, err
61
+		}
62
+	}
63
+	currentContext, exists := rawConfig.Contexts[rawConfig.CurrentContext]
64
+	if !exists { // config specified cluster admin context that doesn't exist; complain and quit
65
+		configErr := fmt.Errorf("Current context '%s' not found in client configuration; will not attempt cluster diagnostics.", rawConfig.CurrentContext)
66
+		o.Logger.Errorf("discClustCtx", configErr.Error())
67
+		return nil, nil, false, configErr
68
+	}
69
+	// check if current context is already cluster admin
70
+	if os, kube, found, err := o.makeClusterClients(rawConfig, rawConfig.CurrentContext, currentContext); found {
71
+		return os, kube, true, err
72
+	}
73
+	// otherwise, for convenience, search for a context with the same server but with the system:admin user
74
+	for name, context := range rawConfig.Contexts {
75
+		if context.Cluster == currentContext.Cluster && name != rawConfig.CurrentContext && strings.HasPrefix(context.AuthInfo, "system:admin/") {
76
+			if os, kube, found, err := o.makeClusterClients(rawConfig, name, context); found {
77
+				return os, kube, true, err
78
+			} else {
79
+				return nil, nil, false, err // don't try more than one such context, they'll probably fail the same
80
+			}
81
+		}
82
+	}
83
+	return nil, nil, false, nil
84
+}
85
+
86
+func (o DiagnosticsOptions) makeClusterClients(rawConfig *clientcmdapi.Config, contextName string, context *clientcmdapi.Context) (*client.Client, *kclient.Client, bool, error) {
87
+	overrides := &clientcmd.ConfigOverrides{Context: *context}
88
+	clientConfig := clientcmd.NewDefaultClientConfig(*rawConfig, overrides)
89
+	factory := osclientcmd.NewFactory(clientConfig)
90
+	o.Logger.Debugf("discClustCtxStart", "Checking if context is cluster-admin: '%s'", contextName)
91
+	if osClient, kubeClient, err := factory.Clients(); err != nil {
92
+		o.Logger.Debugf("discClustCtx", "Error creating client for context '%s':\n%v", contextName, err)
93
+		return nil, nil, false, nil
94
+	} else {
95
+		subjectAccessReview := authorizationapi.SubjectAccessReview{
96
+			// we assume if you can list nodes, you're the cluster admin.
97
+			Verb:     "list",
98
+			Resource: "nodes",
99
+		}
100
+		if resp, err := osClient.SubjectAccessReviews("default").Create(&subjectAccessReview); err != nil {
101
+			o.Logger.Errorf("discClustCtx", "Error testing cluster-admin access for context '%s':\n%v", contextName, err)
102
+			return nil, nil, false, err
103
+		} else if resp.Allowed {
104
+			o.Logger.Infof("discClustCtxFound", "Using context for cluster-admin access: '%s'", contextName)
105
+			return osClient, kubeClient, true, nil
106
+		}
107
+	}
108
+	o.Logger.Debugf("discClustCtx", "Context does not have cluster-admin access: '%s'", contextName)
109
+	return nil, nil, false, nil
110
+}
0 111
new file mode 100644
... ...
@@ -0,0 +1,29 @@
0
+package diagnostics
1
+
2
+import (
3
+	clientcmdapi "github.com/GoogleCloudPlatform/kubernetes/pkg/client/clientcmd/api"
4
+	"github.com/openshift/origin/pkg/cmd/cli/config"
5
+
6
+	clientdiagnostics "github.com/openshift/origin/pkg/diagnostics/client"
7
+	"github.com/openshift/origin/pkg/diagnostics/log"
8
+	"github.com/openshift/origin/pkg/diagnostics/types"
9
+)
10
+
11
+func (o DiagnosticsOptions) detectClientConfig() (bool, []types.DiagnosticError, []types.DiagnosticError) {
12
+	diagnostic := &clientdiagnostics.ConfigLoading{ConfFlagName: config.OpenShiftConfigFlagName, ClientFlags: o.ClientFlags}
13
+	o.Logger.Noticet("diagRun", "Determining if client configuration exists for client/cluster diagnostics",
14
+		log.Hash{"area": "client", "name": diagnostic.Name(), "diag": diagnostic.Description()})
15
+	result := diagnostic.Check()
16
+	for _, entry := range result.Logs() {
17
+		o.Logger.LogEntry(entry)
18
+	}
19
+	return diagnostic.SuccessfulLoad(), result.Warnings(), result.Errors()
20
+}
21
+
22
+func (o DiagnosticsOptions) buildRawConfig() (*clientcmdapi.Config, error) {
23
+	kubeConfig, configErr := o.Factory.OpenShiftClientConfig.RawConfig()
24
+	if len(kubeConfig.Contexts) == 0 {
25
+		return nil, configErr
26
+	}
27
+	return &kubeConfig, configErr
28
+}
... ...
@@ -2,89 +2,80 @@ package diagnostics
2 2
 
3 3
 import (
4 4
 	"fmt"
5
+	"github.com/spf13/cobra"
6
+	flag "github.com/spf13/pflag"
5 7
 	"io"
6 8
 	"os"
7 9
 
8
-	"github.com/spf13/cobra"
9
-
10 10
 	kcmdutil "github.com/GoogleCloudPlatform/kubernetes/pkg/kubectl/cmd/util"
11 11
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
12 12
 	kutilerrors "github.com/GoogleCloudPlatform/kubernetes/pkg/util/errors"
13
-
14
-	diagnosticflags "github.com/openshift/origin/pkg/cmd/experimental/diagnostics/options"
15
-	"github.com/openshift/origin/pkg/cmd/templates"
13
+	"github.com/openshift/origin/pkg/cmd/cli/config"
14
+	"github.com/openshift/origin/pkg/cmd/flagtypes"
16 15
 	osclientcmd "github.com/openshift/origin/pkg/cmd/util/clientcmd"
17
-	"github.com/openshift/origin/pkg/diagnostics/log"
18
-)
19 16
 
20
-var (
21
-	AvailableOverallDiagnostics = util.NewStringSet()
17
+	"github.com/openshift/origin/pkg/cmd/experimental/diagnostics/options"
18
+	"github.com/openshift/origin/pkg/diagnostics/log"
19
+	"github.com/openshift/origin/pkg/diagnostics/types"
22 20
 )
23 21
 
24
-func init() {
25
-	AvailableOverallDiagnostics.Insert(AvailableClientDiagnostics.List()...)
26
-	AvailableOverallDiagnostics.Insert(AvailableMasterDiagnostics.List()...)
27
-	AvailableOverallDiagnostics.Insert(AvailableNodeDiagnostics.List()...)
28
-}
29
-
30
-type OverallDiagnosticsOptions struct {
22
+type DiagnosticsOptions struct {
31 23
 	RequestedDiagnostics util.StringList
32 24
 
33 25
 	MasterConfigLocation string
34 26
 	NodeConfigLocation   string
27
+	ClientClusterContext string
28
+	IsHost               bool
35 29
 
36
-	Factory *osclientcmd.Factory
30
+	ClientFlags *flag.FlagSet
31
+	Factory     *osclientcmd.Factory
37 32
 
38 33
 	LogOptions *log.LoggerOptions
39 34
 	Logger     *log.Logger
40 35
 }
41 36
 
42
-const longAllDescription = `
37
+const longDescription = `
43 38
 OpenShift Diagnostics
44 39
 
45
-This command helps you understand and troubleshoot OpenShift. It is
46
-intended to be run from the same context as an OpenShift client or running
47
-master / node in order to troubleshoot from the perspective of each.
40
+This command helps you understand and troubleshoot OpenShift. It runs
41
+diagnostics against an OpenShift cluster as with a client and/or the
42
+state of a running master / node host.
48 43
 
49 44
     $ %[1]s
50 45
 
51
-If run without flags or subcommands, it will check for config files for
52
-client, master, and node, and if found, use them for troubleshooting
53
-those components. If master/node config files are not found, the tool
54
-assumes they are not present and does diagnostics only as a client.
55
-
56
-You may also specify config files explicitly with flags below, in which
57
-case you will receive an error if they are invalid or not found.
46
+If run without flags, it will check for standard config files for
47
+client, master, and node, and if found, use them for diagnostics.
48
+You may also specify config files explicitly with flags, in which case
49
+you will receive an error if they are not found. For example:
58 50
 
59 51
     $ %[1]s --master-config=/etc/openshift/master/master-config.yaml
60 52
 
61
-Subcommands may be used to scope the troubleshooting to a particular
62
-component and are not limited to using config files; you can and should
63
-use the same flags that are actually set on the command line for that
64
-component to configure the diagnostic.
53
+* If master/node config files are not found and the --host flag is not
54
+  present, host diagnostics are skipped.
55
+* If the client has cluster-admin access, this access enables cluster
56
+  diagnostics to run which regular users cannot.
57
+* If a client config file is not found, client and cluster diagnostics
58
+  are skipped.
65 59
 
66
-    $ %[1]s node --hostname='node.example.com' --kubeconfig=...
67
-
68
-NOTE: This is an alpha version of diagnostics and will change significantly.
69
-NOTE: Global flags (from the 'options' subcommand) are ignored here but
70
-can be used with subcommands.
60
+NOTE: This is a beta version of diagnostics and may still evolve in a
61
+different direction.
71 62
 `
72 63
 
73 64
 func NewCommandDiagnostics(name string, fullName string, out io.Writer) *cobra.Command {
74
-	o := &OverallDiagnosticsOptions{
75
-		RequestedDiagnostics: AvailableOverallDiagnostics.List(),
65
+	o := &DiagnosticsOptions{
66
+		RequestedDiagnostics: util.StringList{},
76 67
 		LogOptions:           &log.LoggerOptions{Out: out},
77 68
 	}
78 69
 
79 70
 	cmd := &cobra.Command{
80 71
 		Use:   name,
81 72
 		Short: "This utility helps you understand and troubleshoot OpenShift v3.",
82
-		Long:  fmt.Sprintf(longAllDescription, fullName),
73
+		Long:  fmt.Sprintf(longDescription, fullName),
83 74
 		Run: func(c *cobra.Command, args []string) {
84 75
 			kcmdutil.CheckErr(o.Complete())
85 76
 
86
-			failed, err := o.RunDiagnostics()
87
-			o.Logger.Summary()
77
+			failed, err, warnCount, errorCount := o.RunDiagnostics()
78
+			o.Logger.Summary(warnCount, errorCount)
88 79
 			o.Logger.Finish()
89 80
 
90 81
 			kcmdutil.CheckErr(err)
... ...
@@ -96,21 +87,22 @@ func NewCommandDiagnostics(name string, fullName string, out io.Writer) *cobra.C
96 96
 	}
97 97
 	cmd.SetOutput(out) // for output re: usage / help
98 98
 
99
-	o.Factory = osclientcmd.New(cmd.Flags()) // side effect: add standard persistent flags for openshift client
100
-	cmd.Flags().StringVar(&o.MasterConfigLocation, "master-config", "", "path to master config file")
101
-	cmd.Flags().StringVar(&o.NodeConfigLocation, "node-config", "", "path to node config file")
102
-	diagnosticflags.BindLoggerOptionFlags(cmd.Flags(), o.LogOptions, diagnosticflags.RecommendedLoggerOptionFlags())
103
-	diagnosticflags.BindDiagnosticFlag(cmd.Flags(), &o.RequestedDiagnostics, diagnosticflags.NewRecommendedDiagnosticFlag())
104
-
105
-	cmd.AddCommand(NewClientCommand(ClientDiagnosticsRecommendedName, name+" "+ClientDiagnosticsRecommendedName, out))
106
-	cmd.AddCommand(NewMasterCommand(MasterDiagnosticsRecommendedName, name+" "+MasterDiagnosticsRecommendedName, out))
107
-	cmd.AddCommand(NewNodeCommand(NodeDiagnosticsRecommendedName, name+" "+NodeDiagnosticsRecommendedName, out))
108
-	cmd.AddCommand(NewOptionsCommand())
99
+	o.ClientFlags = flag.NewFlagSet("client", flag.ContinueOnError) // hide the extensive set of client flags
100
+	o.Factory = osclientcmd.New(o.ClientFlags)                      // that would otherwise be added to this command
101
+	cmd.Flags().AddFlag(o.ClientFlags.Lookup(config.OpenShiftConfigFlagName))
102
+	cmd.Flags().AddFlag(o.ClientFlags.Lookup("context")) // TODO: find k8s constant
103
+	cmd.Flags().StringVar(&o.ClientClusterContext, options.FlagClusterContextName, "", "client context to use for cluster administrator")
104
+	cmd.Flags().StringVar(&o.MasterConfigLocation, options.FlagMasterConfigName, "", "path to master config file (implies --host)")
105
+	cmd.Flags().StringVar(&o.NodeConfigLocation, options.FlagNodeConfigName, "", "path to node config file (implies --host)")
106
+	cmd.Flags().BoolVar(&o.IsHost, options.FlagIsHostName, false, "look for systemd and journald units even without master/node config")
107
+	flagtypes.GLog(cmd.Flags())
108
+	options.BindLoggerOptionFlags(cmd.Flags(), o.LogOptions, options.RecommendedLoggerOptionFlags())
109
+	options.BindDiagnosticFlag(cmd.Flags(), &o.RequestedDiagnostics, options.NewRecommendedDiagnosticFlag())
109 110
 
110 111
 	return cmd
111 112
 }
112 113
 
113
-func (o *OverallDiagnosticsOptions) Complete() error {
114
+func (o *DiagnosticsOptions) Complete() error {
114 115
 	var err error
115 116
 	o.Logger, err = o.LogOptions.NewLogger()
116 117
 	if err != nil {
... ...
@@ -120,112 +112,129 @@ func (o *OverallDiagnosticsOptions) Complete() error {
120 120
 	return nil
121 121
 }
122 122
 
123
-func (o OverallDiagnosticsOptions) RunDiagnostics() (bool, error) {
123
+func (o DiagnosticsOptions) RunDiagnostics() (bool, error, int, int) {
124 124
 	failed := false
125
+	warnings := []error{}
125 126
 	errors := []error{}
126
-
127
-	masterFailed, err := o.CheckMaster()
128
-	failed = failed && masterFailed
129
-	if err != nil {
130
-		errors = append(errors, err)
131
-	}
132
-
133
-	nodeFailed, err := o.CheckNode()
134
-	failed = failed && nodeFailed
135
-	if err != nil {
136
-		errors = append(errors, err)
137
-	}
138
-
139
-	clientFailed, err := o.CheckClient()
140
-	failed = failed && clientFailed
141
-	if err != nil {
142
-		errors = append(errors, err)
143
-	}
144
-
145
-	return failed, kutilerrors.NewAggregate(errors)
146
-}
147
-
148
-func (o OverallDiagnosticsOptions) CheckClient() (bool, error) {
149
-	runClientChecks := true
150
-
151
-	_, kubeClient, err := o.Factory.Clients()
152
-	if err != nil {
153
-		runClientChecks = false
127
+	diagnostics := map[string][]types.Diagnostic{}
128
+	AvailableDiagnostics := util.NewStringSet()
129
+	AvailableDiagnostics.Insert(AvailableClientDiagnostics.List()...)
130
+	AvailableDiagnostics.Insert(AvailableClusterDiagnostics.List()...)
131
+	AvailableDiagnostics.Insert(AvailableHostDiagnostics.List()...)
132
+	if len(o.RequestedDiagnostics) == 0 {
133
+		o.RequestedDiagnostics = AvailableDiagnostics.List()
134
+	} else if common := intersection(util.NewStringSet(o.RequestedDiagnostics...), AvailableDiagnostics); len(common) == 0 {
135
+		o.Logger.Errort("emptyReqDiag", "None of the requested diagnostics are available:\n  {{.requested}}\nPlease try from the following:\n  {{.available}}",
136
+			log.Hash{"requested": o.RequestedDiagnostics, "available": AvailableDiagnostics.List()})
137
+		return false, fmt.Errorf("No requested diagnostics available"), 0, 1
138
+	} else if len(common) < len(o.RequestedDiagnostics) {
139
+		errors = append(errors, fmt.Errorf("Not all requested diagnostics are available"))
140
+		o.Logger.Errort("notAllReqDiag", `
141
+Of the requested diagnostics:
142
+    {{.requested}}
143
+only these are available:
144
+    {{.common}}
145
+The list of all possible is:
146
+    {{.available}}
147
+		`, log.Hash{"requested": o.RequestedDiagnostics, "common": common.List(), "available": AvailableDiagnostics.List()})
154 148
 	}
155 149
 
156
-	kubeConfig, err := o.Factory.OpenShiftClientConfig.RawConfig()
157
-	if err != nil {
158
-		runClientChecks = false
159
-	}
160
-
161
-	if runClientChecks {
162
-		clientDiagnosticOptions := &ClientDiagnosticsOptions{
163
-			RequestedDiagnostics: intersection(util.NewStringSet(o.RequestedDiagnostics...), AvailableClientDiagnostics).List(),
164
-			KubeClient:           kubeClient,
165
-			KubeConfig:           &kubeConfig,
166
-			LogOptions:           o.LogOptions,
167
-			Logger:               o.Logger,
150
+	func() { // don't trust discovery/build of diagnostics; wrap panic nicely in case of developer error
151
+		defer func() {
152
+			if r := recover(); r != nil {
153
+				failed = true
154
+				errors = append(errors, fmt.Errorf("While building the diagnostics, a panic was encountered.\nThis is a bug in diagnostics. Stack trace follows : \n%v", r))
155
+			}
156
+		}()
157
+		detected, detectWarnings, detectErrors := o.detectClientConfig() // may log and return problems
158
+		for _, warn := range detectWarnings {
159
+			warnings = append(warnings, warn)
168 160
 		}
161
+		for _, err := range detectErrors {
162
+			errors = append(errors, err)
163
+		}
164
+		if !detected { // there just plain isn't any client config file available
165
+			o.Logger.Notice("discNoClientConf", "No client configuration specified; skipping client and cluster diagnostics.")
166
+		} else if rawConfig, err := o.buildRawConfig(); rawConfig == nil { // client config is totally broken - won't parse etc (problems may have been detected and logged)
167
+			o.Logger.Errorf("discBrokenClientConf", "Client configuration failed to load; skipping client and cluster diagnostics due to error: {{.error}}", log.Hash{"error": err.Error()})
168
+			errors = append(errors, err)
169
+		} else {
170
+			if err != nil { // error encountered, proceed with caution
171
+				o.Logger.Errorf("discClientConfErr", "Client configuration loading encountered an error, but proceeding anyway. Error was:\n{{.error}}", log.Hash{"error": err.Error()})
172
+				errors = append(errors, err)
173
+			}
174
+			if clientDiags, ok, err := o.buildClientDiagnostics(rawConfig); ok {
175
+				diagnostics["client"] = clientDiags
176
+			} else if err != nil {
177
+				failed = true
178
+				errors = append(errors, err)
179
+			}
169 180
 
170
-		return clientDiagnosticOptions.RunDiagnostics()
171
-	}
172
-
173
-	return false, nil
174
-}
175
-
176
-func (o OverallDiagnosticsOptions) CheckNode() (bool, error) {
177
-	if len(o.NodeConfigLocation) == 0 {
178
-		if _, err := os.Stat(StandardNodeConfigPath); !os.IsNotExist(err) {
179
-			o.NodeConfigLocation = StandardNodeConfigPath
181
+			if clusterDiags, ok, err := o.buildClusterDiagnostics(rawConfig); ok {
182
+				diagnostics["cluster"] = clusterDiags
183
+			} else if err != nil {
184
+				failed = true
185
+				errors = append(errors, err)
186
+			}
180 187
 		}
181
-	}
182 188
 
183
-	if len(o.NodeConfigLocation) != 0 {
184
-		masterDiagnosticOptions := &NodeDiagnosticsOptions{
185
-			RequestedDiagnostics: intersection(util.NewStringSet(o.RequestedDiagnostics...), AvailableNodeDiagnostics).List(),
186
-			NodeConfigLocation:   o.NodeConfigLocation,
187
-			LogOptions:           o.LogOptions,
188
-			Logger:               o.Logger,
189
+		if hostDiags, ok, err := o.buildHostDiagnostics(); ok {
190
+			diagnostics["host"] = hostDiags
191
+		} else if err != nil {
192
+			failed = true
193
+			errors = append(errors, err)
189 194
 		}
195
+	}()
190 196
 
191
-		return masterDiagnosticOptions.RunDiagnostics()
197
+	if failed {
198
+		return failed, kutilerrors.NewAggregate(errors), len(warnings), len(errors)
192 199
 	}
193 200
 
194
-	return false, nil
201
+	failed, err, numWarnings, numErrors := o.Run(diagnostics)
202
+	numWarnings += len(warnings)
203
+	numErrors += len(errors)
204
+	return failed, err, numWarnings, numErrors
195 205
 }
196 206
 
197
-func (o OverallDiagnosticsOptions) CheckMaster() (bool, error) {
198
-	if len(o.MasterConfigLocation) == 0 {
199
-		if _, err := os.Stat(StandardMasterConfigPath); !os.IsNotExist(err) {
200
-			o.MasterConfigLocation = StandardMasterConfigPath
207
+func (o DiagnosticsOptions) Run(diagnostics map[string][]types.Diagnostic) (bool, error, int, int) {
208
+	warnCount := 0
209
+	errorCount := 0
210
+	for area, areaDiagnostics := range diagnostics {
211
+		for _, diagnostic := range areaDiagnostics {
212
+			func() { // wrap diagnostic panic nicely in case of developer error
213
+				defer func() {
214
+					if r := recover(); r != nil {
215
+						errorCount += 1
216
+						o.Logger.Errort("diagPanic",
217
+							"While running the {{.area}}.{{.name}} diagnostic, a panic was encountered.\nThis is a bug in diagnostics. Stack trace follows : \n{{.error}}",
218
+							log.Hash{"area": area, "name": diagnostic.Name(), "error": fmt.Sprintf("%v", r)})
219
+					}
220
+				}()
221
+
222
+				if canRun, reason := diagnostic.CanRun(); !canRun {
223
+					if reason == nil {
224
+						o.Logger.Noticet("diagSkip", "Skipping diagnostic: {{.area}}.{{.name}}\nDescription: {{.diag}}",
225
+							log.Hash{"area": area, "name": diagnostic.Name(), "diag": diagnostic.Description()})
226
+					} else {
227
+						o.Logger.Noticet("diagSkip", "Skipping diagnostic: {{.area}}.{{.name}}\nDescription: {{.diag}}\nBecause: {{.reason}}",
228
+							log.Hash{"area": area, "name": diagnostic.Name(), "diag": diagnostic.Description(), "reason": reason.Error()})
229
+					}
230
+					return
231
+				}
232
+
233
+				o.Logger.Noticet("diagRun", "Running diagnostic: {{.area}}.{{.name}}\nDescription: {{.diag}}",
234
+					log.Hash{"area": area, "name": diagnostic.Name(), "diag": diagnostic.Description()})
235
+				r := diagnostic.Check()
236
+				for _, entry := range r.Logs() {
237
+					o.Logger.LogEntry(entry)
238
+				}
239
+				warnCount += len(r.Warnings())
240
+				errorCount += len(r.Errors())
241
+			}()
201 242
 		}
202
-	}
203
-
204
-	if len(o.MasterConfigLocation) != 0 {
205
-		masterDiagnosticOptions := &MasterDiagnosticsOptions{
206
-			RequestedDiagnostics: intersection(util.NewStringSet(o.RequestedDiagnostics...), AvailableMasterDiagnostics).List(),
207
-			MasterConfigLocation: o.MasterConfigLocation,
208
-			LogOptions:           o.LogOptions,
209
-			Logger:               o.Logger,
210
-		}
211
-
212
-		return masterDiagnosticOptions.RunDiagnostics()
213
-	}
214
-
215
-	return false, nil
216
-}
217 243
 
218
-func NewOptionsCommand() *cobra.Command {
219
-	cmd := &cobra.Command{
220
-		Use: "options",
221
-		Run: func(cmd *cobra.Command, args []string) {
222
-			cmd.Usage()
223
-		},
224 244
 	}
225
-
226
-	templates.UseOptionsTemplates(cmd)
227
-
228
-	return cmd
245
+	return errorCount > 0, nil, warnCount, errorCount
229 246
 }
230 247
 
231 248
 // TODO move upstream
232 249
new file mode 100644
... ...
@@ -0,0 +1,76 @@
0
+package diagnostics
1
+
2
+import (
3
+	"fmt"
4
+	"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
5
+	"github.com/openshift/origin/pkg/diagnostics/host"
6
+	systemddiagnostics "github.com/openshift/origin/pkg/diagnostics/systemd"
7
+	"github.com/openshift/origin/pkg/diagnostics/types"
8
+	"os"
9
+)
10
+
11
+const (
12
+	StandardMasterConfigPath string = "/etc/openshift/master/master-config.yaml"
13
+	StandardNodeConfigPath   string = "/etc/openshift/node/node-config.yaml"
14
+)
15
+
16
+var (
17
+	AvailableHostDiagnostics = util.NewStringSet("AnalyzeLogs", "UnitStatus", "MasterConfigCheck", "NodeConfigCheck")
18
+)
19
+
20
+func (o DiagnosticsOptions) buildHostDiagnostics() ([]types.Diagnostic, bool /* ok */, error) {
21
+	requestedDiagnostics := intersection(util.NewStringSet(o.RequestedDiagnostics...), AvailableHostDiagnostics).List()
22
+	if len(requestedDiagnostics) == 0 { // no diagnostics to run here
23
+		return nil, true, nil // don't waste time on discovery
24
+	}
25
+	isHost := o.IsHost
26
+	// check for standard host config paths if not given
27
+	if len(o.MasterConfigLocation) == 0 {
28
+		if _, err := os.Stat(StandardMasterConfigPath); !os.IsNotExist(err) {
29
+			o.MasterConfigLocation = StandardMasterConfigPath
30
+			isHost = true
31
+		}
32
+	} else {
33
+		isHost = true
34
+	}
35
+	if len(o.NodeConfigLocation) == 0 {
36
+		if _, err := os.Stat(StandardNodeConfigPath); !os.IsNotExist(err) {
37
+			o.NodeConfigLocation = StandardNodeConfigPath
38
+			isHost = true
39
+		}
40
+	} else {
41
+		isHost = true
42
+	}
43
+
44
+	// If we're not looking at a host, don't try the diagnostics
45
+	if !isHost {
46
+		return nil, true, nil
47
+	}
48
+
49
+	diagnostics := []types.Diagnostic{}
50
+	systemdUnits := systemddiagnostics.GetSystemdUnits(o.Logger)
51
+	for _, diagnosticName := range requestedDiagnostics {
52
+		switch diagnosticName {
53
+		case "AnalyzeLogs":
54
+			diagnostics = append(diagnostics, systemddiagnostics.AnalyzeLogs{systemdUnits})
55
+
56
+		case "UnitStatus":
57
+			diagnostics = append(diagnostics, systemddiagnostics.UnitStatus{systemdUnits})
58
+
59
+		case "MasterConfigCheck":
60
+			if len(o.MasterConfigLocation) > 0 {
61
+				diagnostics = append(diagnostics, host.MasterConfigCheck{o.MasterConfigLocation})
62
+			}
63
+
64
+		case "NodeConfigCheck":
65
+			if len(o.NodeConfigLocation) > 0 {
66
+				diagnostics = append(diagnostics, host.NodeConfigCheck{o.NodeConfigLocation})
67
+			}
68
+
69
+		default:
70
+			return diagnostics, false, fmt.Errorf("unknown diagnostic: %v", diagnosticName)
71
+		}
72
+	}
73
+
74
+	return diagnostics, true, nil
75
+}
0 76
deleted file mode 100644
... ...
@@ -1,143 +0,0 @@
1
-package diagnostics
2
-
3
-import (
4
-	"fmt"
5
-	"io"
6
-	"os"
7
-
8
-	"github.com/spf13/cobra"
9
-
10
-	kcmdutil "github.com/GoogleCloudPlatform/kubernetes/pkg/kubectl/cmd/util"
11
-	"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
12
-
13
-	diagnosticflags "github.com/openshift/origin/pkg/cmd/experimental/diagnostics/options"
14
-	"github.com/openshift/origin/pkg/diagnostics/log"
15
-	masterdiagnostics "github.com/openshift/origin/pkg/diagnostics/master"
16
-	systemddiagnostics "github.com/openshift/origin/pkg/diagnostics/systemd"
17
-	diagnostictypes "github.com/openshift/origin/pkg/diagnostics/types/diagnostic"
18
-)
19
-
20
-const (
21
-	MasterDiagnosticsRecommendedName = "master"
22
-
23
-	StandardMasterConfigPath string = "/etc/openshift/master/master-config.yaml"
24
-)
25
-
26
-var (
27
-	AvailableMasterDiagnostics = util.NewStringSet("AnalyzeLogs", "UnitStatus", "MasterConfigCheck")
28
-)
29
-
30
-// user options for openshift-diagnostics client command
31
-type MasterDiagnosticsOptions struct {
32
-	RequestedDiagnostics util.StringList
33
-
34
-	MasterConfigLocation string
35
-
36
-	LogOptions *log.LoggerOptions
37
-	Logger     *log.Logger
38
-}
39
-
40
-const longMasterDescription = `
41
-OpenShift Diagnostics
42
-
43
-This command helps you understand and troubleshoot a running OpenShift
44
-master. It is intended to be run from the same context as the master
45
-(where "openshift start" or "openshift start master" is run, possibly from
46
-systemd or inside a container) and with the same configuration options.
47
-
48
-    $ %s
49
-`
50
-
51
-func NewMasterCommand(name string, fullName string, out io.Writer) *cobra.Command {
52
-	o := &MasterDiagnosticsOptions{
53
-		RequestedDiagnostics: AvailableMasterDiagnostics.List(),
54
-		LogOptions:           &log.LoggerOptions{Out: out},
55
-	}
56
-
57
-	cmd := &cobra.Command{
58
-		Use:   name,
59
-		Short: "Troubleshoot an OpenShift v3 master.",
60
-		Long:  fmt.Sprintf(longMasterDescription, fullName),
61
-		Run: func(c *cobra.Command, args []string) {
62
-			kcmdutil.CheckErr(o.Complete())
63
-
64
-			failed, err := o.RunDiagnostics()
65
-			o.Logger.Summary()
66
-			o.Logger.Finish()
67
-
68
-			kcmdutil.CheckErr(err)
69
-			if failed {
70
-				os.Exit(255)
71
-			}
72
-		},
73
-	}
74
-
75
-	cmd.SetOutput(out) // for output re: usage / help
76
-
77
-	cmd.Flags().StringVar(&o.MasterConfigLocation, "master-config", "", "path to master config file")
78
-	diagnosticflags.BindLoggerOptionFlags(cmd.Flags(), o.LogOptions, diagnosticflags.RecommendedLoggerOptionFlags())
79
-	diagnosticflags.BindDiagnosticFlag(cmd.Flags(), &o.RequestedDiagnostics, diagnosticflags.NewRecommendedDiagnosticFlag())
80
-
81
-	return cmd
82
-}
83
-
84
-func (o *MasterDiagnosticsOptions) Complete() error {
85
-	// set the master config location if it hasn't been set and we find it in an expected location
86
-	if len(o.MasterConfigLocation) == 0 {
87
-		if _, err := os.Stat(StandardMasterConfigPath); !os.IsNotExist(err) {
88
-			o.MasterConfigLocation = StandardMasterConfigPath
89
-		}
90
-
91
-	}
92
-
93
-	var err error
94
-	o.Logger, err = o.LogOptions.NewLogger()
95
-	if err != nil {
96
-		return err
97
-	}
98
-
99
-	return nil
100
-}
101
-
102
-func (o MasterDiagnosticsOptions) RunDiagnostics() (bool, error) {
103
-	diagnostics := map[string]diagnostictypes.Diagnostic{}
104
-
105
-	// if we don't have a master config file, then there's no work to do
106
-	if len(o.MasterConfigLocation) == 0 {
107
-		// TODO remove MasterConfigCheck from the list
108
-	}
109
-
110
-	systemdUnits := systemddiagnostics.GetSystemdUnits(o.Logger)
111
-
112
-	for _, diagnosticName := range o.RequestedDiagnostics {
113
-		switch diagnosticName {
114
-		case "AnalyzeLogs":
115
-			diagnostics[diagnosticName] = systemddiagnostics.AnalyzeLogs{systemdUnits, o.Logger}
116
-
117
-		case "UnitStatus":
118
-			diagnostics[diagnosticName] = systemddiagnostics.UnitStatus{systemdUnits, o.Logger}
119
-
120
-		case "MasterConfigCheck":
121
-			diagnostics[diagnosticName] = masterdiagnostics.MasterConfigCheck{o.MasterConfigLocation, o.Logger}
122
-
123
-		default:
124
-			return false, fmt.Errorf("unknown diagnostic: %v", diagnosticName)
125
-		}
126
-	}
127
-
128
-	for name, diagnostic := range diagnostics {
129
-		if canRun, reason := diagnostic.CanRun(); !canRun {
130
-			if reason == nil {
131
-				o.Logger.Noticem(log.Message{ID: "diagSkip", Template: "Skipping diagnostic: {{.area}}.{{.name}}\nDescription: {{.diag}}", TemplateData: map[string]string{"area": "master", "name": name, "diag": diagnostic.Description()}})
132
-			} else {
133
-				o.Logger.Noticem(log.Message{ID: "diagSkip", Template: "Skipping diagnostic: {{.area}}.{{.name}}\nDescription: {{.diag}}\nBecause: {{.reason}}", TemplateData: map[string]string{"area": "master", "name": name, "diag": diagnostic.Description(), "reason": reason.Error()}})
134
-			}
135
-			continue
136
-		}
137
-
138
-		o.Logger.Noticem(log.Message{ID: "diagRun", Template: "Running diagnostic: {{.area}}.{{.name}}\nDescription: {{.diag}}", TemplateData: map[string]string{"area": "master", "name": name, "diag": diagnostic.Description()}})
139
-		diagnostic.Check()
140
-	}
141
-
142
-	return o.Logger.ErrorsSeen(), nil
143
-}
144 1
deleted file mode 100644
... ...
@@ -1,142 +0,0 @@
1
-package diagnostics
2
-
3
-import (
4
-	"fmt"
5
-	"io"
6
-	"os"
7
-
8
-	"github.com/spf13/cobra"
9
-
10
-	kcmdutil "github.com/GoogleCloudPlatform/kubernetes/pkg/kubectl/cmd/util"
11
-	"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
12
-
13
-	diagnosticflags "github.com/openshift/origin/pkg/cmd/experimental/diagnostics/options"
14
-	"github.com/openshift/origin/pkg/diagnostics/log"
15
-	nodediagnostics "github.com/openshift/origin/pkg/diagnostics/node"
16
-	systemddiagnostics "github.com/openshift/origin/pkg/diagnostics/systemd"
17
-	diagnostictypes "github.com/openshift/origin/pkg/diagnostics/types/diagnostic"
18
-)
19
-
20
-const (
21
-	NodeDiagnosticsRecommendedName = "node"
22
-
23
-	StandardNodeConfigPath string = "/etc/openshift/node/node-config.yaml"
24
-)
25
-
26
-var (
27
-	AvailableNodeDiagnostics = util.NewStringSet("AnalyzeLogs", "UnitStatus", "NodeConfigCheck")
28
-)
29
-
30
-// user options for openshift-diagnostics client command
31
-type NodeDiagnosticsOptions struct {
32
-	RequestedDiagnostics util.StringList
33
-
34
-	NodeConfigLocation string
35
-
36
-	LogOptions *log.LoggerOptions
37
-	Logger     *log.Logger
38
-}
39
-
40
-const longNodeDescription = `
41
-OpenShift Diagnostics
42
-
43
-This command helps you understand and troubleshoot a running OpenShift
44
-node. It is intended to be run from the same context as the node
45
-(where "openshift start" or "openshift start node" is run, possibly from
46
-systemd or inside a container) and with the same configuration options.
47
-
48
-    $ %s
49
-`
50
-
51
-func NewNodeCommand(name string, fullName string, out io.Writer) *cobra.Command {
52
-	o := &NodeDiagnosticsOptions{
53
-		RequestedDiagnostics: AvailableNodeDiagnostics.List(),
54
-		LogOptions:           &log.LoggerOptions{Out: out},
55
-	}
56
-
57
-	cmd := &cobra.Command{
58
-		Use:   name,
59
-		Short: "Troubleshoot an OpenShift v3 node.",
60
-		Long:  fmt.Sprintf(longNodeDescription, fullName),
61
-		Run: func(c *cobra.Command, args []string) {
62
-			kcmdutil.CheckErr(o.Complete())
63
-
64
-			failed, err := o.RunDiagnostics()
65
-			o.Logger.Summary()
66
-			o.Logger.Finish()
67
-
68
-			kcmdutil.CheckErr(err)
69
-			if failed {
70
-				os.Exit(255)
71
-			}
72
-		},
73
-	}
74
-
75
-	cmd.SetOutput(out) // for output re: usage / help
76
-
77
-	cmd.Flags().StringVar(&o.NodeConfigLocation, "node-config", "", "path to node config file")
78
-	diagnosticflags.BindLoggerOptionFlags(cmd.Flags(), o.LogOptions, diagnosticflags.RecommendedLoggerOptionFlags())
79
-	diagnosticflags.BindDiagnosticFlag(cmd.Flags(), &o.RequestedDiagnostics, diagnosticflags.NewRecommendedDiagnosticFlag())
80
-
81
-	return cmd
82
-}
83
-
84
-func (o *NodeDiagnosticsOptions) Complete() error {
85
-	// set the node config location if it hasn't been set and we find it in an expected location
86
-	if len(o.NodeConfigLocation) == 0 {
87
-		if _, err := os.Stat(StandardNodeConfigPath); !os.IsNotExist(err) {
88
-			o.NodeConfigLocation = StandardNodeConfigPath
89
-		}
90
-	}
91
-
92
-	var err error
93
-	o.Logger, err = o.LogOptions.NewLogger()
94
-	if err != nil {
95
-		return err
96
-	}
97
-
98
-	return nil
99
-}
100
-
101
-func (o NodeDiagnosticsOptions) RunDiagnostics() (bool, error) {
102
-	diagnostics := map[string]diagnostictypes.Diagnostic{}
103
-
104
-	// if we don't have a node config file, then there's no work to do
105
-	if len(o.NodeConfigLocation) == 0 {
106
-		// TODO remove NodeConfigCheck from the list
107
-	}
108
-
109
-	systemdUnits := systemddiagnostics.GetSystemdUnits(o.Logger)
110
-
111
-	for _, diagnosticName := range o.RequestedDiagnostics {
112
-		switch diagnosticName {
113
-		case "AnalyzeLogs":
114
-			diagnostics[diagnosticName] = systemddiagnostics.AnalyzeLogs{systemdUnits, o.Logger}
115
-
116
-		case "UnitStatus":
117
-			diagnostics[diagnosticName] = systemddiagnostics.UnitStatus{systemdUnits, o.Logger}
118
-
119
-		case "NodeConfigCheck":
120
-			diagnostics[diagnosticName] = nodediagnostics.NodeConfigCheck{o.NodeConfigLocation, o.Logger}
121
-
122
-		default:
123
-			return false, fmt.Errorf("unknown diagnostic: %v", diagnosticName)
124
-		}
125
-	}
126
-
127
-	for name, diagnostic := range diagnostics {
128
-		if canRun, reason := diagnostic.CanRun(); !canRun {
129
-			if reason == nil {
130
-				o.Logger.Noticem(log.Message{ID: "diagSkip", Template: "Skipping diagnostic: {{.area}}.{{.name}}\nDescription: {{.diag}}", TemplateData: map[string]string{"area": "node", "name": name, "diag": diagnostic.Description()}})
131
-			} else {
132
-				o.Logger.Noticem(log.Message{ID: "diagSkip", Template: "Skipping diagnostic: {{.area}}.{{.name}}\nDescription: {{.diag}}\nBecause: {{.reason}}", TemplateData: map[string]string{"area": "node", "name": name, "diag": diagnostic.Description(), "reason": reason.Error()}})
133
-			}
134
-			continue
135
-		}
136
-
137
-		o.Logger.Noticem(log.Message{ID: "diagRun", Template: "Running diagnostic: {{.area}}.{{.name}}\nDescription: {{.diag}}", TemplateData: map[string]string{"area": "node", "name": name, "diag": diagnostic.Description()}})
138
-		diagnostic.Check()
139
-	}
140
-
141
-	return o.Logger.ErrorsSeen(), nil
142
-}
... ...
@@ -8,27 +8,27 @@ import (
8 8
 	"github.com/openshift/origin/pkg/diagnostics/log"
9 9
 )
10 10
 
11
-type RecommendedLoggerOptionsFlags struct {
11
+type LoggerOptionFlags struct {
12 12
 	Level  FlagInfo
13 13
 	Format FlagInfo
14 14
 }
15 15
 
16 16
 // default overrideable flag specifications to be bound to options.
17
-func RecommendedLoggerOptionFlags() RecommendedLoggerOptionsFlags {
18
-	return RecommendedLoggerOptionsFlags{
17
+func RecommendedLoggerOptionFlags() LoggerOptionFlags {
18
+	return LoggerOptionFlags{
19 19
 		Level:  FlagInfo{FlagLevelName, "l", "1", "Level of diagnostic output: 4: Error, 3: Warn, 2: Notice, 1: Info, 0: Debug"},
20 20
 		Format: FlagInfo{FlagFormatName, "o", "text", "Output format: text|json|yaml"},
21 21
 	}
22 22
 }
23 23
 
24
-func BindLoggerOptionFlags(cmdFlags *pflag.FlagSet, loggerOptions *log.LoggerOptions, flags RecommendedLoggerOptionsFlags) {
24
+func BindLoggerOptionFlags(cmdFlags *pflag.FlagSet, loggerOptions *log.LoggerOptions, flags LoggerOptionFlags) {
25 25
 	flags.Level.BindIntFlag(cmdFlags, &loggerOptions.Level)
26 26
 	flags.Format.BindStringFlag(cmdFlags, &loggerOptions.Format)
27 27
 }
28 28
 
29 29
 // default overrideable flag specifications to be bound to options.
30 30
 func NewRecommendedDiagnosticFlag() FlagInfo {
31
-	return FlagInfo{FlagDiagnosticsName, "d", "", `comma-separated list of diagnostic names to run, e.g. "systemd.AnalyzeLogs"`}
31
+	return FlagInfo{FlagDiagnosticsName, "d", "", `comma-separated list of diagnostic names to run, e.g. "AnalyzeLogs"`}
32 32
 }
33 33
 
34 34
 func BindDiagnosticFlag(cmdFlags *pflag.FlagSet, diagnostics *util.StringList, flagInfo FlagInfo) {
... ...
@@ -14,10 +14,11 @@ type FlagInfo kclientcmd.FlagInfo // reuse to add methods
14 14
 // with tweaked definitions in different contexts if necessary.
15 15
 
16 16
 func (i FlagInfo) BindStringFlag(flags *pflag.FlagSet, target *string) {
17
-	// assume flags with no longname are not desired
18
-	if len(i.LongName) > 0 {
19
-		flags.StringVarP(target, i.LongName, i.ShortName, i.Default, i.Description)
20
-	}
17
+	kclientcmd.FlagInfo(i).BindStringFlag(flags, target)
18
+}
19
+
20
+func (i FlagInfo) BindBoolFlag(flags *pflag.FlagSet, target *bool) {
21
+	kclientcmd.FlagInfo(i).BindBoolFlag(flags, target)
21 22
 }
22 23
 
23 24
 func (i FlagInfo) BindIntFlag(flags *pflag.FlagSet, target *int) {
... ...
@@ -29,15 +30,6 @@ func (i FlagInfo) BindIntFlag(flags *pflag.FlagSet, target *int) {
29 29
 	}
30 30
 }
31 31
 
32
-func (i FlagInfo) BindBoolFlag(flags *pflag.FlagSet, target *bool) {
33
-	// assume flags with no longname are not desired
34
-	if len(i.LongName) > 0 {
35
-		// try to parse Default as a bool.  If it fails, assume false
36
-		boolVal, _ := strconv.ParseBool(i.Default)
37
-		flags.BoolVarP(target, i.LongName, i.ShortName, boolVal, i.Description)
38
-	}
39
-}
40
-
41 32
 func (i FlagInfo) BindListFlag(flags *pflag.FlagSet, target *kutil.StringList) {
42 33
 	// assume flags with no longname are not desired
43 34
 	if len(i.LongName) > 0 {
... ...
@@ -46,12 +38,11 @@ func (i FlagInfo) BindListFlag(flags *pflag.FlagSet, target *kutil.StringList) {
46 46
 }
47 47
 
48 48
 const (
49
-	FlagAllClientConfigName = "client-config"
50
-	FlagAllMasterConfigName = "master-config"
51
-	FlagAllNodeConfigName   = "node-config"
52
-	FlagDiagnosticsName     = "diagnostics"
53
-	FlagLevelName           = "diaglevel"
54
-	FlagFormatName          = "output"
55
-	FlagMasterConfigName    = "config"
56
-	FlagNodeConfigName      = "config"
49
+	FlagMasterConfigName   = "master-config"
50
+	FlagNodeConfigName     = "node-config"
51
+	FlagClusterContextName = "cluster-context"
52
+	FlagDiagnosticsName    = "diagnostics"
53
+	FlagLevelName          = "diaglevel"
54
+	FlagFormatName         = "output"
55
+	FlagIsHostName         = "host"
57 56
 )
... ...
@@ -2,7 +2,7 @@ OpenShift v3 Diagnostics
2 2
 ========================
3 3
 
4 4
 This is a tool to help administrators and users resolve common problems
5
-that occur with OpenShift v3 deployments. It is currently (May 2015)
5
+that occur with OpenShift v3 deployments. It will likely remain
6 6
 under continuous development as the OpenShift Origin project progresses.
7 7
 
8 8
 The goals of the diagnostics tool are summarized in this [Trello
... ...
@@ -22,32 +22,21 @@ added to the `openshift` binary itself so that wherever there is an
22 22
 OpenShift server or client, the diagnostics can run in the exact same
23 23
 environment.
24 24
 
25
-`openshift ex diagnostics` subcommands for master, node, and client
26
-provide flags to mimic the configurations for those respective components,
27
-so that running diagnostics against a component should be as simple as
28
-supplying the same flags that would invoke the component. So,
29
-for example, if a master is started with:
30
-
31
-    openshift start master --public-hostname=...
32
-
33
-Then diagnostics against that master would simply be run as:
34
-
35
-    openshift ex diagnostics master --public-hostname=...
36
-
37
-In this way it should be possible to invoke diagnostics against any
38
-given environment.
25
+Diagnostics looks for config files in standard locations. If not found,
26
+related diagnostics are just skipped. Non-standard locations can be
27
+specified with flags.
39 28
 
40 29
 Host environment
41 30
 ================
42 31
 
43
-However, master/node diagnostics will be most useful in a specific
44
-target environment, which is a deployment using Enterprise RPMs and
45
-ansible deployment logic. This provides two major benefits:
32
+Master/node diagnostics will be most useful in a specific target
33
+environment, which is a deployment using RPMs and ansible deployment
34
+logic. This provides two major benefits:
46 35
 
47 36
 * master/node configuration is based on a configuration file in a standard location
48 37
 * all components log to journald
49 38
 
50
-Having configuration file in standard locations means you will generally
39
+Having configuration files in standard locations means you will generally
51 40
 not even need to specify where to find them. Running:
52 41
 
53 42
     openshift ex diagnostics
... ...
@@ -71,14 +60,54 @@ Client environment
71 71
 ==================
72 72
 
73 73
 The user may only have access as an ordinary user, as a cluster-admin
74
-user, or may have admin on a host where OpenShift master or node services
75
-are operating. The diagnostics will attempt to use as much access as
76
-the user has available.
74
+user, and/or may be running on a host where OpenShift master or node
75
+services are operating. The diagnostics will attempt to use as much
76
+access as the user has available.
77 77
 
78 78
 A client with ordinary access should be able to diagnose its connection
79
-to the master and look for problems in builds and deployments.
80
-
81
-A client with cluster-admin access should be able to diagnose the same
82
-things for every project in the deployment, as well as infrastructure
83
-status.
79
+to the master and look for problems in builds and deployments for the
80
+current context.
81
+
82
+A client with cluster-admin access should be able to diagnose the
83
+status of infrastructure.
84
+
85
+Writing diagnostics
86
+===================
87
+
88
+Developers are encouraged to add to the available diagnostics as they
89
+encounter problems that are not easily communicated in the normal
90
+operations of the program, for example components with misconfigured
91
+connections, problems that are buried in logs, etc. The sanity you
92
+save may be your own.
93
+
94
+A diagnostic is an object that conforms to the Diagnostic interface
95
+(see pkg/diagnostics/types/diagnostic.go). The diagnostic object should
96
+be built in one of the builders in the pkg/cmd/experimental/diagnostics
97
+package (based on whether it depends on client, cluster-admin, or host
98
+configuration). When executed, the diagnostic logs its findings into
99
+a result object. It should be assumed that they may run in parallel.
100
+
101
+Diagnostics should prefer providing information over perfect accuracy,
102
+as they are the first line of (self-)support for users. On the other
103
+hand, judgment should be exercised to prevent sending users down useless
104
+paths or flooding them with non-issues that obscure real problems.
105
+
106
+* Errors should be reserved for things that are almost certainly broken
107
+  or causing problems, for example a broken URL.
108
+* Warnings indicate issues that may be a problem but could be valid for
109
+  some configurations / situations, for example a node being disabled.
110
+
111
+Enabling automation
112
+===================
113
+
114
+Diagnostic messages are designed to be logged either for human consumption
115
+("text" format) or for scripting/automation ("yaml" or "json" formats). So
116
+messages should:
117
+
118
+* Have an ID that is unique and unchanging, such that automated alerts
119
+  could filter on specific IDs rather than rely on message text or level.
120
+* Log any data that might be relevant in an automated alert as
121
+  template data; for example, when a node is down, include the name of
122
+  the node so that automation could decide how important it is.
123
+* Not put anything in message template data that cannot be serialized.
84 124
 
... ...
@@ -3,14 +3,25 @@ package client
3 3
 import (
4 4
 	"errors"
5 5
 	"fmt"
6
+	"regexp"
7
+	"strings"
6 8
 
7 9
 	kapi "github.com/GoogleCloudPlatform/kubernetes/pkg/api"
10
+	kclientcmd "github.com/GoogleCloudPlatform/kubernetes/pkg/client/clientcmd"
8 11
 	kclientcmdapi "github.com/GoogleCloudPlatform/kubernetes/pkg/client/clientcmd/api"
12
+	"github.com/GoogleCloudPlatform/kubernetes/pkg/fields"
13
+	"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
9 14
 
15
+	osclientcmd "github.com/openshift/origin/pkg/cmd/util/clientcmd"
10 16
 	"github.com/openshift/origin/pkg/diagnostics/log"
11
-	"github.com/openshift/origin/pkg/diagnostics/types/diagnostic"
17
+	"github.com/openshift/origin/pkg/diagnostics/types"
12 18
 )
13 19
 
20
+type ConfigContext struct {
21
+	RawConfig   *kclientcmdapi.Config
22
+	ContextName string
23
+}
24
+
14 25
 const (
15 26
 	currentContextMissing = `Your client config specifies a current context of '{{.context}}'
16 27
 which is not defined; it is likely that a mistake was introduced while
... ...
@@ -22,20 +33,145 @@ useful to use this as a base if available.`
22 22
 	currentContextSummary = `The current context from client config is '{{.context}}'
23 23
 This will be used by default to contact your OpenShift server.
24 24
 `
25
+	contextDesc = `
26
+For client config context '{{.context}}':
27
+The server URL is '{{.server}}'
28
+The user authentication is '{{.user}}'
29
+The current project is '{{.project}}'
30
+`
31
+	currContextDesc = `
32
+The current client config context is '{{.context}}':
33
+The server URL is '{{.server}}'
34
+The user authentication is '{{.user}}'
35
+The current project is '{{.project}}'
36
+`
37
+	clientNoResolve = `
38
+This usually means that the hostname does not resolve to an IP.
39
+Hostnames should usually be resolved via DNS or an /etc/hosts file.
40
+Ensure that the hostname resolves correctly from your host before proceeding.
41
+Of course, your config could also simply have the wrong hostname specified.
42
+`
43
+	clientUnknownCa = `
44
+This means that we cannot validate the certificate in use by the
45
+OpenShift API server, so we cannot securely communicate with it.
46
+Connections could be intercepted and your credentials stolen.
47
+
48
+Since the server certificate we see when connecting is not validated
49
+by public certificate authorities (CAs), you probably need to specify a
50
+certificate from a private CA to validate the connection.
51
+
52
+Your config may be specifying the wrong CA cert, or none, or there
53
+could actually be a man-in-the-middle attempting to intercept your
54
+connection.  If you are unconcerned about any of this, you can add the
55
+--insecure-skip-tls-verify flag to bypass secure (TLS) verification,
56
+but this is risky and should not be necessary.
57
+** Connections could be intercepted and your credentials stolen. **
58
+`
59
+	clientUnneededCa = `
60
+This means that for client connections to the OpenShift API server, you
61
+(or your kubeconfig) specified both a validating certificate authority
62
+and that the client should bypass connection security validation.
63
+
64
+This is not allowed because it is likely to be a mistake.
65
+
66
+If you want to use --insecure-skip-tls-verify to bypass security (which
67
+is usually a bad idea anyway), then you need to also clear the CA cert
68
+from your command line options or kubeconfig file(s). Of course, it
69
+would be far better to obtain and use a correct CA cert.
70
+`
71
+	clientInvCertName = `
72
+This means that the certificate in use by the OpenShift API server
73
+(master) does not match the hostname by which you are addressing it:
74
+  %s
75
+so a secure connection is not allowed. In theory, this *could* mean that
76
+someone is intercepting your connection and presenting a certificate
77
+that is valid but for a different server, which is why secure validation
78
+fails in this case.
79
+
80
+However, the most likely explanation is that the server certificate
81
+needs to be updated to include the name you are using to reach it.
82
+
83
+If the OpenShift server is generating its own certificates (which
84
+is default), then the --public-master flag on the OpenShift master is
85
+usually the easiest way to do this. If you need something more complicated
86
+(for instance, multiple public addresses for the API, or your own CA),
87
+then you will need to custom-generate the server certificate with the
88
+right names yourself.
89
+
90
+If you are unconcerned about any of this, you can add the
91
+--insecure-skip-tls-verify flag to bypass secure (TLS) verification,
92
+but this is risky and should not be necessary.
93
+** Connections could be intercepted and your credentials stolen. **
94
+`
95
+	clientConnRefused = `
96
+This means that when we tried to connect to the OpenShift API
97
+server (master), we reached the host, but nothing accepted the port
98
+connection. This could mean that the OpenShift master is stopped, or
99
+that a firewall or security policy is blocking access at that port.
100
+
101
+You will not be able to connect or do anything at all with OpenShift
102
+until this server problem is resolved or you specify a corrected
103
+server address.`
104
+
105
+	clientConnTimeout = `
106
+This means that when we tried to connect to the OpenShift API server
107
+(master), we could not reach the host at all.
108
+* You may have specified the wrong host address.
109
+* This could mean the host is completely unavailable (down).
110
+* This could indicate a routing problem or a firewall that simply
111
+  drops requests rather than responding by reseting the connection.
112
+* It does not generally mean that DNS name resolution failed (which
113
+  would be a different error) though the problem could be that it
114
+  gave the wrong address.`
115
+	clientMalformedHTTP = `
116
+This means that when we tried to connect to the OpenShift API server
117
+(master) with a plain HTTP connection, the server did not speak
118
+HTTP back to us. The most common explanation is that a secure server
119
+is listening but you specified an http: connection instead of https:.
120
+There could also be another service listening at the intended port
121
+speaking some other protocol entirely.
122
+
123
+You will not be able to connect or do anything at all with OpenShift
124
+until this server problem is resolved or you specify a corrected
125
+server address.`
126
+	clientMalformedTLS = `
127
+This means that when we tried to connect to the OpenShift API server
128
+(master) with a secure HTTPS connection, the server did not speak
129
+HTTPS back to us. The most common explanation is that the server
130
+listening at that port is not the secure server you expected - it
131
+may be a non-secure HTTP server or the wrong service may be
132
+listening there, or you may have specified an incorrect port.
133
+
134
+You will not be able to connect or do anything at all with OpenShift
135
+until this server problem is resolved or you specify a corrected
136
+server address.`
137
+	clientUnauthn = `
138
+This means that when we tried to make a request to the OpenShift API
139
+server, your kubeconfig did not present valid credentials to
140
+authenticate your client. Credentials generally consist of a client
141
+key/certificate or an access token. Your kubeconfig may not have
142
+presented any, or they may be invalid.`
143
+	clientUnauthz = `
144
+This means that when we tried to make a request to the OpenShift API
145
+server, the request required credentials that were not presented.
146
+This can happen when an authentication token expires. Try logging in
147
+with this user again.`
25 148
 )
26 149
 
27
-type ConfigContext struct {
28
-	KubeConfig  *kclientcmdapi.Config
29
-	ContextName string
150
+var (
151
+	invalidCertNameRx = regexp.MustCompile("x509: certificate is valid for (\\S+, )+not (\\S+)")
152
+)
30 153
 
31
-	Log *log.Logger
154
+func (d ConfigContext) Name() string {
155
+	return fmt.Sprintf("ConfigContext[%s]", d.ContextName)
32 156
 }
33 157
 
34 158
 func (d ConfigContext) Description() string {
35
-	return "Test that client config contexts have no undefined references"
159
+	return "Validate client config context is complete and has connectivity"
36 160
 }
161
+
37 162
 func (d ConfigContext) CanRun() (bool, error) {
38
-	if d.KubeConfig == nil {
163
+	if d.RawConfig == nil {
39 164
 		// TODO make prettier?
40 165
 		return false, errors.New("There is no client config file")
41 166
 	}
... ...
@@ -46,13 +182,13 @@ func (d ConfigContext) CanRun() (bool, error) {
46 46
 
47 47
 	return true, nil
48 48
 }
49
-func (d ConfigContext) Check() (bool, []log.Message, []error, []error) {
50
-	if _, err := d.CanRun(); err != nil {
51
-		return false, nil, nil, []error{err}
52
-	}
53 49
 
54
-	isDefaultContext := d.KubeConfig.CurrentContext == d.ContextName
50
+func (d ConfigContext) Check() *types.DiagnosticResult {
51
+	r := types.NewDiagnosticResult("ConfigContext")
55 52
 
53
+	isDefaultContext := d.RawConfig.CurrentContext == d.ContextName
54
+
55
+	// prepare bad news message
56 56
 	errorKey := "clientCfgError"
57 57
 	unusableLine := fmt.Sprintf("The client config context '%s' is unusable", d.ContextName)
58 58
 	if isDefaultContext {
... ...
@@ -60,40 +196,92 @@ func (d ConfigContext) Check() (bool, []log.Message, []error, []error) {
60 60
 		unusableLine = fmt.Sprintf("The current client config context '%s' is unusable", d.ContextName)
61 61
 	}
62 62
 
63
-	context, exists := d.KubeConfig.Contexts[d.ContextName]
63
+	// check that the context and its constitutuents are defined in the kubeconfig
64
+	context, exists := d.RawConfig.Contexts[d.ContextName]
64 65
 	if !exists {
65
-		err := diagnostic.NewDiagnosticError(errorKey, "", fmt.Errorf(unusableLine+":\n Client config context '%s' is not defined.", d.ContextName))
66
-		d.Log.Error(err.ID, err.Cause.Error())
67
-		return false, nil, nil, []error{err}
66
+		r.Errorf(errorKey, nil, "%s:\n Client config context '%s' is not defined.", unusableLine, d.ContextName)
67
+		return r
68 68
 	}
69
-
70 69
 	clusterName := context.Cluster
71
-	cluster, exists := d.KubeConfig.Clusters[clusterName]
70
+	cluster, exists := d.RawConfig.Clusters[clusterName]
72 71
 	if !exists {
73
-
74
-		err := diagnostic.NewDiagnosticError(errorKey, "", fmt.Errorf(unusableLine+":\n Client config context '%s' has a cluster '%s' which is not defined.", d.ContextName, clusterName))
75
-		d.Log.Error(err.ID, err.Cause.Error())
76
-		return false, nil, nil, []error{err}
72
+		r.Errorf(errorKey, nil, "%s:\n Client config context '%s' has a cluster '%s' which is not defined.", unusableLine, d.ContextName, clusterName)
73
+		return r
77 74
 	}
78 75
 	authName := context.AuthInfo
79
-	if _, exists := d.KubeConfig.AuthInfos[authName]; !exists {
80
-
81
-		err := diagnostic.NewDiagnosticError(errorKey, "", fmt.Errorf(unusableLine+":\n Client config context '%s' has a user identity '%s' which is not defined.", d.ContextName, authName))
82
-		d.Log.Error(err.ID, err.Cause.Error())
83
-		return false, nil, nil, []error{err}
76
+	if _, exists := d.RawConfig.AuthInfos[authName]; !exists {
77
+		r.Errorf(errorKey, nil, "%s:\n Client config context '%s' has a user identity '%s' which is not defined.", unusableLine, d.ContextName, authName)
78
+		return r
84 79
 	}
85 80
 
81
+	// we found a fully-defined context
86 82
 	project := context.Namespace
87 83
 	if project == "" {
88 84
 		project = kapi.NamespaceDefault // OpenShift/k8s fills this in if missing
85
+	}
86
+	msgData := log.Hash{"context": d.ContextName, "server": cluster.Server, "user": authName, "project": project}
87
+	msgText := contextDesc
88
+	if isDefaultContext {
89
+		msgText = currContextDesc
90
+	}
89 91
 
92
+	// Actually send a request to see if context has connectivity.
93
+	// Note: we cannot reuse factories as they cache the clients, so build new factory for each context.
94
+	osClient, _, err := osclientcmd.NewFactory(kclientcmd.NewDefaultClientConfig(*d.RawConfig, &kclientcmd.ConfigOverrides{Context: *context})).Clients()
95
+	// client create now fails if cannot connect to server, so address connectivity errors below
96
+	if err == nil {
97
+		if projects, projerr := osClient.Projects().List(labels.Everything(), fields.Everything()); projerr != nil {
98
+			err = projerr
99
+		} else { // success!
100
+			list := []string{}
101
+			for i, project := range projects.Items {
102
+				if i > 9 {
103
+					list = append(list, "...")
104
+					break
105
+				}
106
+				list = append(list, project.Name)
107
+			}
108
+			msgData["projects"] = list
109
+			if len(list) == 0 {
110
+				r.Infot("CCctxSuccess", msgText+"Successfully requested project list, but it is empty, so user has no access to anything.", msgData)
111
+			} else {
112
+				r.Infot("CCctxSuccess", msgText+"Successfully requested project list; has access to project(s):\n  {{.projects}}", msgData)
113
+			}
114
+			return r
115
+		}
90 116
 	}
91 117
 
92
-	// TODO: actually send a request to see if can connect
93
-	message := log.Message{EvaluatedText: fmt.Sprintf("For client config context '%s':\n The server URL is '%s'\nThe user authentication is '%s'\nThe current project is '%s'", d.ContextName, cluster.Server, authName, project)}
94
-	if isDefaultContext {
95
-		message = log.Message{EvaluatedText: fmt.Sprintf("The current client config context is '%s':\n The server URL is '%s'\nThe user authentication is '%s'\nThe current project is '%s'", d.ContextName, cluster.Server, authName, project)}
118
+	// something went wrong; couldn't create client or get project list.
119
+	// interpret the terse error messages with helpful info.
120
+	errMsg := err.Error()
121
+	msgData["errMsg"] = fmt.Sprintf("(%T) %[1]v", err)
122
+	var reason, errId string
123
+	switch {
124
+	case regexp.MustCompile("dial tcp: lookup (\\S+): no such host").MatchString(errMsg):
125
+		errId, reason = "clientNoResolve", clientNoResolve
126
+	case strings.Contains(errMsg, "x509: certificate signed by unknown authority"):
127
+		errId, reason = "clientUnknownCa", clientUnknownCa
128
+	case strings.Contains(errMsg, "specifying a root certificates file with the insecure flag is not allowed"):
129
+		errId, reason = "clientUnneededCa", clientUnneededCa
130
+	case invalidCertNameRx.MatchString(errMsg):
131
+		match := invalidCertNameRx.FindStringSubmatch(errMsg)
132
+		serverHost := match[len(match)-1]
133
+		errId, reason = "clientInvCertName", fmt.Sprintf(clientInvCertName, serverHost)
134
+	case regexp.MustCompile("dial tcp (\\S+): connection refused").MatchString(errMsg):
135
+		errId, reason = "clientConnRefused", clientConnRefused
136
+	case regexp.MustCompile("dial tcp (\\S+): (?:connection timed out|i/o timeout|no route to host)").MatchString(errMsg):
137
+		errId, reason = "clientConnTimeout", clientConnTimeout
138
+	case strings.Contains(errMsg, "malformed HTTP response"):
139
+		errId, reason = "clientMalformedHTTP", clientMalformedHTTP
140
+	case strings.Contains(errMsg, "tls: oversized record received with length"):
141
+		errId, reason = "clientMalformedTLS", clientMalformedTLS
142
+	case strings.Contains(errMsg, `403 Forbidden: Forbidden: "/osapi/v1beta1/projects?namespace=" denied by default`):
143
+		errId, reason = "clientUnauthn", clientUnauthn
144
+	case regexp.MustCompile("401 Unauthorized: Unauthorized$").MatchString(errMsg):
145
+		errId, reason = "clientUnauthz", clientUnauthz
146
+	default:
147
+		errId, reason = "clientUnknownConnErr", `Diagnostics does not have an explanation for what this means. Please report this error so one can be added.`
96 148
 	}
97
-	d.Log.LogMessage(log.InfoLevel, message)
98
-	return true, []log.Message{message}, nil, nil
149
+	r.Errort(errId, err, msgText+"{{.errMsg}}\n"+reason, msgData)
150
+	return r
99 151
 }
100 152
new file mode 100644
... ...
@@ -0,0 +1,151 @@
0
+package client
1
+
2
+import (
3
+	"fmt"
4
+	"io/ioutil"
5
+	"os"
6
+
7
+	"github.com/GoogleCloudPlatform/kubernetes/pkg/client/clientcmd"
8
+	flag "github.com/spf13/pflag"
9
+
10
+	"github.com/openshift/origin/pkg/cmd/cli/config"
11
+	"github.com/openshift/origin/pkg/diagnostics/log"
12
+	"github.com/openshift/origin/pkg/diagnostics/types"
13
+)
14
+
15
+// This diagnostic is a little special in that it is run separately as a precondition
16
+// in order to determine whether we can run other dependent diagnostics
17
+
18
+type ConfigLoading struct {
19
+	ConfFlagName   string
20
+	ClientFlags    *flag.FlagSet
21
+	successfulLoad bool // set if at least one file loaded
22
+}
23
+
24
+func (d *ConfigLoading) Name() string {
25
+	return "ConfigLoading"
26
+}
27
+
28
+func (d *ConfigLoading) Description() string {
29
+	return "Try to load client config file(s) and report what happens"
30
+}
31
+
32
+func (d *ConfigLoading) CanRun() (bool, error) {
33
+	return true, nil
34
+}
35
+
36
+func (d *ConfigLoading) SuccessfulLoad() bool {
37
+	return d.successfulLoad
38
+}
39
+
40
+func (d *ConfigLoading) Check() *types.DiagnosticResult {
41
+	r := types.NewDiagnosticResult("ConfigLoading")
42
+	confFlagValue := d.ClientFlags.Lookup(d.ConfFlagName).Value.String()
43
+
44
+	var foundPath string
45
+	rules := config.NewOpenShiftClientConfigLoadingRules()
46
+	paths := append([]string{confFlagValue}, rules.Precedence...)
47
+	for index, path := range paths {
48
+		errmsg := ""
49
+		switch index {
50
+		case 0:
51
+			errmsg = fmt.Sprintf("--%s specified that client config should be at %s\n", d.ConfFlagName, path)
52
+		case len(paths) - 1: // config in ~/.kube
53
+		// no error message indicated if it is not there... user didn't say it would be
54
+		default: // can be multiple paths from the env var in theory; all cases should go here
55
+			if len(os.Getenv(config.OpenShiftConfigPathEnvVar)) != 0 {
56
+				errmsg = fmt.Sprintf("Env var %s specified that client config could be at %s\n", config.OpenShiftConfigPathEnvVar, path)
57
+			}
58
+		}
59
+
60
+		if d.canOpenConfigFile(path, errmsg, r) && foundPath == "" {
61
+			d.successfulLoad = true
62
+			foundPath = path
63
+		}
64
+	}
65
+	if foundPath != "" {
66
+		if confFlagValue != "" && confFlagValue != foundPath {
67
+			// found config but not where --config said
68
+			r.Errorf("discCCnotFlag", nil, `
69
+The client configuration file was not found where the --%s flag indicated:
70
+  %s
71
+A config file was found at the following location:
72
+  %s
73
+If you wish to use this file for client configuration, you can specify it
74
+with the --%[1]s flag, or just not specify the flag.
75
+			`, d.ConfFlagName, confFlagValue, foundPath)
76
+		}
77
+	} else { // not found, check for master-generated ones to recommend
78
+		if confFlagValue != "" {
79
+			r.Errorf("discCCnotFlag", nil, "Did not find config file where --%s=%s indicated", d.ConfFlagName, confFlagValue)
80
+		}
81
+		adminWarningF := `
82
+No client config file was available; however, one exists at
83
+    %[2]s
84
+which may have been generated automatically by the master.
85
+If you want to use this config, you should copy it to the
86
+standard location (%[3]s),
87
+or you can set the environment variable %[1]s:
88
+    export %[1]s=%[2]s
89
+If not, obtain a config file and place it in the standard
90
+location for use by the client and diagnostics.
91
+`
92
+		adminPaths := []string{
93
+			"/etc/openshift/master/admin.kubeconfig",           // enterprise
94
+			"/openshift.local.config/master/admin.kubeconfig",  // origin systemd
95
+			"./openshift.local.config/master/admin.kubeconfig", // origin binary
96
+		}
97
+		// look for it in auto-generated locations when not found properly
98
+		for _, path := range adminPaths {
99
+			msg := fmt.Sprintf("Looking for a possible client config at %s\n", path)
100
+			if d.canOpenConfigFile(path, msg, r) {
101
+				r.Warnf("discCCautoPath", nil, adminWarningF, config.OpenShiftConfigPathEnvVar, path, config.RecommendedHomeFile)
102
+				break
103
+			}
104
+		}
105
+	}
106
+	return r
107
+}
108
+
109
+// ----------------------------------------------------------
110
+// Attempt to open file at path as client config
111
+// If there is a problem and errmsg is set, log an error
112
+func (d ConfigLoading) canOpenConfigFile(path string, errmsg string, r *types.DiagnosticResult) bool {
113
+	var file *os.File
114
+	var err error
115
+	if path == "" { // empty param/envvar
116
+		return false
117
+	} else if file, err = os.Open(path); err == nil {
118
+		r.Debugt("discOpenCC", "Reading client config at {{.path}}", log.Hash{"path": path})
119
+	} else if errmsg == "" {
120
+		r.Debugf("discOpenCCNo", "Could not read client config at %s:\n%#v", path, err)
121
+	} else if os.IsNotExist(err) {
122
+		r.Debug("discOpenCCNoExist", errmsg+"but that file does not exist.")
123
+	} else if os.IsPermission(err) {
124
+		r.Error("discOpenCCNoPerm", err, errmsg+"but lack permission to read that file.")
125
+	} else {
126
+		r.Errorf("discOpenCCErr", err, "%sbut there was an error opening it:\n%#v", errmsg, err)
127
+	}
128
+	if file != nil { // it is open for reading
129
+		defer file.Close()
130
+		if buffer, err := ioutil.ReadAll(file); err != nil {
131
+			r.Errorf("discCCReadErr", err, "Unexpected error while reading client config file (%s): %v", path, err)
132
+		} else if _, err := clientcmd.Load(buffer); err != nil {
133
+			r.Errorf("discCCYamlErr", err, `
134
+Error reading YAML from client config file (%s):
135
+  %v
136
+This file may have been truncated or mis-edited.
137
+Please fix, remove, or obtain a new client config`, file.Name(), err)
138
+		} else {
139
+			r.Infof("discCCRead", "Successfully read a client config file at '%s'", path)
140
+			/* Note, we're not going to use this config file directly.
141
+			 * Instead, we'll defer to the openshift client code to assimilate
142
+			 * flags, env vars, and the potential hierarchy of config files
143
+			 * into an actual configuration that the client uses.
144
+			 * However, for diagnostic purposes, record the files we find.
145
+			 */
146
+			return true
147
+		}
148
+	}
149
+	return false
150
+}
0 151
deleted file mode 100644
... ...
@@ -1,100 +0,0 @@
1
-package client
2
-
3
-import (
4
-	"errors"
5
-	"fmt"
6
-
7
-	kapi "github.com/GoogleCloudPlatform/kubernetes/pkg/api"
8
-	kclient "github.com/GoogleCloudPlatform/kubernetes/pkg/client"
9
-
10
-	"github.com/GoogleCloudPlatform/kubernetes/pkg/fields"
11
-	"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
12
-	"github.com/openshift/origin/pkg/diagnostics/log"
13
-	"github.com/openshift/origin/pkg/diagnostics/types/diagnostic"
14
-)
15
-
16
-const (
17
-	clientErrorGettingNodes = `Client error while retrieving node records. Client retrieved records
18
-during discovery, so this is likely to be a transient error. Try running
19
-diagnostics again. If this message persists, there may be a permissions
20
-problem with getting node records. The error was:
21
-
22
-(%T) %[1]v`
23
-
24
-	nodeNotReady = `Node {{.node}} is defined but is not marked as ready.
25
-Ready status is {{.status}} because "{{.reason}}"
26
-If the node is not intentionally disabled, check that the master can
27
-reach the node hostname for a health check and the node is checking in
28
-to the master with the same hostname.
29
-
30
-While in this state, pods should not be scheduled to deploy on the node,
31
-and any existing scheduled pods will be considered failed and removed.
32
-`
33
-)
34
-
35
-// NodeDefinitions
36
-type NodeDefinition struct {
37
-	KubeClient *kclient.Client
38
-
39
-	Log *log.Logger
40
-}
41
-
42
-func (d NodeDefinition) Description() string {
43
-	return "Check node records on master"
44
-}
45
-func (d NodeDefinition) CanRun() (bool, error) {
46
-	if d.KubeClient == nil {
47
-		// TODO make prettier?
48
-		return false, errors.New("must have kube client")
49
-	}
50
-	if _, err := d.KubeClient.Nodes().List(labels.LabelSelector{}, fields.Everything()); err != nil {
51
-		// TODO check for 403 to return: "Client does not have cluster-admin access and cannot see node records"
52
-
53
-		return false, diagnostic.NewDiagnosticError("clGetNodesFailed", fmt.Sprintf(clientErrorGettingNodes, err), err)
54
-	}
55
-
56
-	return true, nil
57
-}
58
-func (d NodeDefinition) Check() (bool, []log.Message, []error, []error) {
59
-	if _, err := d.CanRun(); err != nil {
60
-		return false, nil, nil, []error{err}
61
-	}
62
-
63
-	nodes, err := d.KubeClient.Nodes().List(labels.LabelSelector{}, fields.Everything())
64
-	if err != nil {
65
-		return false, nil, nil, []error{
66
-			diagnostic.NewDiagnosticError("clGetNodesFailed", fmt.Sprintf(clientErrorGettingNodes, err), err),
67
-		}
68
-	}
69
-
70
-	for _, node := range nodes.Items {
71
-		var ready *kapi.NodeCondition
72
-		for i, condition := range node.Status.Conditions {
73
-			switch condition.Type {
74
-			// currently only one... used to be more, may be again
75
-			case kapi.NodeReady:
76
-				ready = &node.Status.Conditions[i]
77
-				// TODO comment needed to explain why we do last one wins.  should this break instead?
78
-			}
79
-		}
80
-
81
-		if ready == nil || ready.Status != kapi.ConditionTrue {
82
-			// instead of building this, simply use the node object directly
83
-			templateData := map[string]interface{}{}
84
-			templateData["node"] = node.Name
85
-			if ready == nil {
86
-				templateData["status"] = "None"
87
-				templateData["reason"] = "There is no readiness record."
88
-			} else {
89
-				templateData["status"] = ready.Status
90
-				templateData["reason"] = ready.Reason
91
-			}
92
-
93
-			return false, nil, []error{
94
-				diagnostic.NewDiagnosticErrorFromTemplate("clNodeBroken", nodeNotReady, templateData),
95
-			}, nil
96
-		}
97
-	}
98
-
99
-	return true, nil, nil, nil
100
-}
101 1
new file mode 100644
... ...
@@ -0,0 +1,116 @@
0
+package cluster
1
+
2
+// The purpose of this diagnostic is to detect nodes that are out of commission
3
+// (which may affect the ability to schedule pods) for user awareness.
4
+
5
+import (
6
+	"errors"
7
+	"fmt"
8
+
9
+	kapi "github.com/GoogleCloudPlatform/kubernetes/pkg/api"
10
+	kclient "github.com/GoogleCloudPlatform/kubernetes/pkg/client"
11
+
12
+	"github.com/GoogleCloudPlatform/kubernetes/pkg/fields"
13
+	"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
14
+	"github.com/openshift/origin/pkg/diagnostics/log"
15
+	"github.com/openshift/origin/pkg/diagnostics/types"
16
+)
17
+
18
+const (
19
+	clientErrorGettingNodes = `Client error while retrieving node records. Client retrieved records
20
+during discovery, so this is likely to be a transient error. Try running
21
+diagnostics again. If this message persists, there may be a permissions
22
+problem with getting node records. The error was:
23
+
24
+(%T) %[1]v`
25
+
26
+	nodeNotReady = `Node {{.node}} is defined but is not marked as ready.
27
+Ready status is {{.status}} because "{{.reason}}"
28
+If the node is not intentionally disabled, check that the master can
29
+reach the node hostname for a health check and the node is checking in
30
+to the master with the same hostname.
31
+
32
+While in this state, pods should not be scheduled to deploy on the node,
33
+and any existing scheduled pods will be considered failed and removed.
34
+`
35
+
36
+	nodeNotSched = `Node {{.node}} is ready but is marked Unschedulable.
37
+This is usually set manually for administrative reasons.
38
+An administrator can mark the node schedulable with:
39
+    oadm manage-node {{.node}} --schedulable=true
40
+
41
+While in this state, pods should not be scheduled to deploy on the node.
42
+Existing pods will continue to run until completed or evacuated (see
43
+other options for 'oadm manage-node').
44
+`
45
+)
46
+
47
+// NodeDefinitions
48
+type NodeDefinitions struct {
49
+	KubeClient *kclient.Client
50
+}
51
+
52
+func (d NodeDefinitions) Name() string {
53
+	return "NodeDefinitions"
54
+}
55
+
56
+func (d NodeDefinitions) Description() string {
57
+	return "Check node records on master"
58
+}
59
+
60
+func (d NodeDefinitions) CanRun() (bool, error) {
61
+	if d.KubeClient == nil {
62
+		return false, errors.New("must have kube client")
63
+	}
64
+	if _, err := d.KubeClient.Nodes().List(labels.LabelSelector{}, fields.Everything()); err != nil {
65
+		// TODO check for 403 to return: "Client does not have cluster-admin access and cannot see node records"
66
+
67
+		msg := log.Message{ID: "clGetNodesFailed", EvaluatedText: fmt.Sprintf(clientErrorGettingNodes, err)}
68
+		return false, types.DiagnosticError{msg.ID, &msg, err}
69
+	}
70
+
71
+	return true, nil
72
+}
73
+
74
+func (d NodeDefinitions) Check() *types.DiagnosticResult {
75
+	r := types.NewDiagnosticResult("NodeDefinition")
76
+
77
+	nodes, err := d.KubeClient.Nodes().List(labels.LabelSelector{}, fields.Everything())
78
+	if err != nil {
79
+		r.Errorf("clGetNodesFailed", err, clientErrorGettingNodes, err)
80
+		return r
81
+	}
82
+
83
+	anyNodesAvail := false
84
+	for _, node := range nodes.Items {
85
+		var ready *kapi.NodeCondition
86
+		for i, condition := range node.Status.Conditions {
87
+			switch condition.Type {
88
+			// Each condition appears only once. Currently there's only one... used to be more
89
+			case kapi.NodeReady:
90
+				ready = &node.Status.Conditions[i]
91
+			}
92
+		}
93
+
94
+		if ready == nil || ready.Status != kapi.ConditionTrue {
95
+			templateData := log.Hash{"node": node.Name}
96
+			if ready == nil {
97
+				templateData["status"] = "None"
98
+				templateData["reason"] = "There is no readiness record."
99
+			} else {
100
+				templateData["status"] = ready.Status
101
+				templateData["reason"] = ready.Reason
102
+			}
103
+			r.Warnt("clNodeNotReady", nil, nodeNotReady, templateData)
104
+		} else if node.Spec.Unschedulable {
105
+			r.Warnt("clNodeNotSched", nil, nodeNotSched, log.Hash{"node": node.Name})
106
+		} else {
107
+			anyNodesAvail = true
108
+		}
109
+	}
110
+	if !anyNodesAvail {
111
+		r.Error("clNoAvailNodes", nil, "There were no nodes available for OpenShift to use.")
112
+	}
113
+
114
+	return r
115
+}
0 116
deleted file mode 100644
... ...
@@ -1,104 +0,0 @@
1
-package discovery // client
2
-
3
-import (
4
-	"fmt"
5
-	"github.com/openshift/origin/pkg/diagnostics/log"
6
-	"github.com/openshift/origin/pkg/diagnostics/types"
7
-	"os"
8
-	"os/exec"
9
-	"path/filepath"
10
-	"runtime"
11
-	"strings"
12
-)
13
-
14
-// ----------------------------------------------------------
15
-// Look for 'osc' and 'openshift' executables
16
-func (env *Environment) DiscoverClient() error {
17
-	var err error
18
-	f := env.Options.ClientDiagOptions.Factory
19
-	if config, err := f.OpenShiftClientConfig.RawConfig(); err != nil {
20
-		env.Log.Errorf("discCCstart", "Could not read client config: (%T) %[1]v", err)
21
-	} else {
22
-		env.OsConfig = &config
23
-		env.FactoryForContext[config.CurrentContext] = f
24
-	}
25
-	env.Log.Debug("discSearchExec", "Searching for executables in path:\n  "+strings.Join(filepath.SplitList(os.Getenv("PATH")), "\n  ")) //TODO for non-Linux OS
26
-	env.OscPath = env.findExecAndLog("osc")
27
-	if env.OscPath != "" {
28
-		env.OscVersion, err = getExecVersion(env.OscPath, env.Log)
29
-	}
30
-	env.OpenshiftPath = env.findExecAndLog("openshift")
31
-	if env.OpenshiftPath != "" {
32
-		env.OpenshiftVersion, err = getExecVersion(env.OpenshiftPath, env.Log)
33
-	}
34
-	if env.OpenshiftVersion.NonZero() && env.OscVersion.NonZero() && !env.OpenshiftVersion.Eq(env.OscVersion) {
35
-		env.Log.Warnm("discVersionMM", log.Msg{"osV": env.OpenshiftVersion.GoString(), "oscV": env.OscVersion.GoString(),
36
-			"text": fmt.Sprintf("'openshift' version %#v does not match 'osc' version %#v; update or remove the lower version", env.OpenshiftVersion, env.OscVersion)})
37
-	}
38
-	return err
39
-}
40
-
41
-// ----------------------------------------------------------
42
-// Look for a specific executable and log what happens
43
-func (env *Environment) findExecAndLog(cmd string) string {
44
-	if path := findExecFor(cmd); path != "" {
45
-		env.Log.Infom("discExecFound", log.Msg{"command": cmd, "path": path, "tmpl": "Found '{{.command}}' at {{.path}}"})
46
-		return path
47
-	} else {
48
-		env.Log.Warnm("discExecNoPath", log.Msg{"command": cmd, "tmpl": "No '{{.command}}' executable was found in your path"})
49
-	}
50
-	return ""
51
-}
52
-
53
-// ----------------------------------------------------------
54
-// Look in the path for an executable
55
-func findExecFor(cmd string) string {
56
-	path, err := exec.LookPath(cmd)
57
-	if err == nil {
58
-		return path
59
-	}
60
-	if runtime.GOOS == "windows" {
61
-		path, err = exec.LookPath(cmd + ".exe")
62
-		if err == nil {
63
-			return path
64
-		}
65
-	}
66
-	return ""
67
-}
68
-
69
-// ----------------------------------------------------------
70
-// Invoke executable's "version" command to determine version
71
-func getExecVersion(path string, logger *log.Logger) (version types.Version, err error) {
72
-	cmd := exec.Command(path, "version")
73
-	var out []byte
74
-	out, err = cmd.CombinedOutput()
75
-	if err == nil {
76
-		var name string
77
-		var x, y, z int
78
-		if scanned, err := fmt.Sscanf(string(out), "%s v%d.%d.%d", &name, &x, &y, &z); scanned > 1 {
79
-			version = types.Version{x, y, z}
80
-			logger.Infom("discVersion", log.Msg{"tmpl": "version of {{.command}} is {{.version}}", "command": name, "version": version.GoString()})
81
-		} else {
82
-			logger.Errorf("discVersErr", `
83
-Expected version output from '%s version'
84
-Could not parse output received:
85
-%v
86
-Error was: %#v`, path, string(out), err)
87
-		}
88
-	} else {
89
-		switch err.(type) {
90
-		case *exec.Error:
91
-			logger.Errorf("discVersErr", "error in executing '%v version': %v", path, err)
92
-		case *exec.ExitError:
93
-			logger.Errorf("discVersErr", `
94
-Executed '%v version' which exited with an error code.
95
-This version is likely old or broken.
96
-Error was '%v';
97
-Output was:
98
-%v`, path, err.Error(), log.LimitLines(string(out), 5))
99
-		default:
100
-			logger.Errorf("discVersErr", "executed '%v version' but an error occurred:\n%v\nOutput was:\n%v", path, err, string(out))
101
-		}
102
-	}
103
-	return version, err
104
-}
105 1
deleted file mode 100644
... ...
@@ -1,378 +0,0 @@
1
-package discovery // config
2
-
3
-import (
4
-	"fmt"
5
-	kapi "github.com/GoogleCloudPlatform/kubernetes/pkg/api"
6
-	"github.com/GoogleCloudPlatform/kubernetes/pkg/client/clientcmd"
7
-	clientcmdapi "github.com/GoogleCloudPlatform/kubernetes/pkg/client/clientcmd/api"
8
-	"github.com/GoogleCloudPlatform/kubernetes/pkg/fields"
9
-	"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
10
-	"github.com/openshift/origin/pkg/cmd/cli/config"
11
-	"github.com/openshift/origin/pkg/cmd/experimental/diagnostics/options"
12
-	osclientcmd "github.com/openshift/origin/pkg/cmd/util/clientcmd"
13
-	"github.com/openshift/origin/pkg/diagnostics/log"
14
-	"io/ioutil"
15
-	"os"
16
-	"regexp"
17
-	"strings"
18
-)
19
-
20
-/* ----------------------------------------------------------
21
-Look for the client config and try to read it.
22
-
23
-We will look in the standard locations, alert the user to what we find
24
-as we go along, and try to be helpful.
25
-*/
26
-
27
-// -------------------------------------------------------------
28
-// Look for client config file in a number of possible locations
29
-func (env *Environment) ReadClientConfigFiles() {
30
-	confFlagName := options.FlagAllClientConfigName
31
-	confFlag := env.Options.ClientConfigPath // from openshift-diagnostics --client-config
32
-	if flags := env.Options.GlobalFlags; flags != nil {
33
-		name := config.OpenShiftConfigFlagName
34
-		if flag := env.Options.GlobalFlags.Lookup(name); flag != nil {
35
-			confFlag = flag.Value.String() // from openshift-diagnostics client --config
36
-			confFlagName = name
37
-		}
38
-	}
39
-	var found bool
40
-	rules := config.NewOpenShiftClientConfigLoadingRules()
41
-	paths := append([]string{confFlag}, rules.Precedence...)
42
-	for index, path := range paths {
43
-		errmsg := ""
44
-		switch index {
45
-		case 0:
46
-			errmsg = fmt.Sprintf("--"+confFlagName+" specified that client config should be at %s\n", path)
47
-		case len(paths) - 1:
48
-			// do nothing, the config wasn't found in ~
49
-		default:
50
-			if len(os.Getenv(config.OpenShiftConfigPathEnvVar)) != 0 {
51
-				errmsg = fmt.Sprintf("$OPENSHIFTCONFIG specified that client config should be at %s\n", path)
52
-			}
53
-		}
54
-
55
-		if rawConfig := openConfigFile(path, errmsg, env.Log); rawConfig != nil && !found {
56
-			found = true
57
-			env.ClientConfigPath = path
58
-			env.ClientConfigRaw = rawConfig
59
-		}
60
-	}
61
-	if found {
62
-		if confFlag != "" && confFlag != env.ClientConfigPath {
63
-			// found config but not where --config said, so don't continue discovery
64
-			env.Log.Errorf("discCCnotFlag", `
65
-The client configuration file was not found where the --%s flag indicated:
66
-  %s
67
-A config file was found at the following location:
68
-  %s
69
-If you wish to use this file for client configuration, you can specify it
70
-with the --%[1]s flag, or just not specify the flag.
71
-			`, confFlagName, confFlag, env.ClientConfigPath)
72
-		} else {
73
-			// happy path, client config found as expected
74
-			env.WillCheck[ClientTarget] = true
75
-		}
76
-	} else { // not found, decide what to do
77
-		if confFlag != "" { // user expected conf file at specific place
78
-			env.Log.Errorf("discNoCC", "The client configuration file was not found where --%s='%s' indicated.", confFlagName, confFlag)
79
-		} else if !env.Options.ClientDiagOptions.MustCheck {
80
-			env.Log.Notice("discSkipCLI", "No client config file found; client diagnostics will not be performed.")
81
-		} else {
82
-			// user specifically wants to troubleshoot client, but no conf file given
83
-			env.Log.Warn("discNoCCfile", "No client config file read; OpenShift client diagnostics will use flags and default configuration.")
84
-			env.WillCheck[ClientTarget] = true
85
-			adminPaths := []string{
86
-				"/etc/openshift/master/admin.kubeconfig",           // enterprise
87
-				"/openshift.local.config/master/admin.kubeconfig",  // origin systemd
88
-				"./openshift.local.config/master/admin.kubeconfig", // origin binary
89
-			}
90
-			adminWarningF := `
91
-No client config file was available; however, one exists at
92
-  %[1]s
93
-which is a standard location where the master generates it.
94
-If this is what you want to use, you should copy it to a standard location
95
-(~/.config/openshift/.config, or the current directory), or you can set the
96
-environment variable OPENSHIFTCONFIG in your ~/.bash_profile:
97
-  export OPENSHIFTCONFIG=%[1]s
98
-If this is not what you want, you should obtain a config file and
99
-place it in a standard location.
100
-`
101
-			// look for it in auto-generated locations when not found properly
102
-			for _, path := range adminPaths {
103
-				if conf := openConfigFile(path, "", env.Log); conf != nil {
104
-					env.Log.Warnf("discCCautoPath", adminWarningF, path)
105
-					break
106
-				}
107
-			}
108
-		}
109
-	}
110
-}
111
-
112
-// ----------------------------------------------------------
113
-// Attempt to open file at path as client config
114
-// If there is a problem and errmsg is set, log an error
115
-func openConfigFile(path string, errmsg string, logger *log.Logger) *clientcmdapi.Config {
116
-	var err error
117
-	var file *os.File
118
-	if path == "" { // empty param/envvar
119
-		return nil
120
-	} else if file, err = os.Open(path); err == nil {
121
-		logger.Debugm("discOpenCC", log.Msg{"tmpl": "Reading client config at {{.path}}", "path": path})
122
-	} else if errmsg == "" {
123
-		logger.Debugf("discOpenCCNo", "Could not read client config at %s:\n%#v", path, err)
124
-	} else if os.IsNotExist(err) {
125
-		logger.Error("discOpenCCNoExist", errmsg+"but that file does not exist.")
126
-	} else if os.IsPermission(err) {
127
-		logger.Error("discOpenCCNoPerm", errmsg+"but lack permission to read that file.")
128
-	} else {
129
-		logger.Errorf("discOpenCCErr", "%sbut there was an error opening it:\n%#v", errmsg, err)
130
-	}
131
-	if file != nil { // it is open for reading
132
-		defer file.Close()
133
-		if buffer, err := ioutil.ReadAll(file); err != nil {
134
-			logger.Errorf("discCCReadErr", "Unexpected error while reading client config file (%s): %v", path, err)
135
-		} else if conf, err := clientcmd.Load(buffer); err != nil {
136
-			logger.Errorf("discCCYamlErr", `
137
-Error reading YAML from client config file (%s):
138
-  %v
139
-This file may have been truncated or mis-edited.
140
-Please fix, remove, or obtain a new client config`, file.Name(), err)
141
-		} else {
142
-			logger.Infom("discCCRead", log.Msg{"tmpl": `Successfully read a client config file at '{{.path}}'`, "path": path})
143
-			/* Note, we're not going to use this config file directly.
144
-			 * Instead, we'll defer to the openshift client code to assimilate
145
-			 * flags, env vars, and the potential hierarchy of config files
146
-			 * into an actual configuration that the client uses.
147
-			 * However, for diagnostic purposes, record the first we find.
148
-			 */
149
-			return conf
150
-		}
151
-	}
152
-	return nil
153
-}
154
-
155
-/* The full client configuration may specify multiple contexts, each
156
- * of which could be a different server, a different user, a different
157
- * default project. We want to check which contexts have useful access,
158
- * and record those. At this point, we should already have the factory
159
- * for the current context. Factories embed config and a client cache,
160
- * and since we want to do discovery for every available context, we are
161
- * going to create a factory for each context. We will determine which
162
- * context actually has access to the default project, preferring the
163
- * current (default) context if it does. Connection errors should be
164
- * diagnosed along the way.
165
- */
166
-func (env *Environment) ConfigClient() {
167
-	if env.OsConfig != nil {
168
-		// TODO: run these in parallel, with a time limit so connection timeouts don't take forever
169
-		for cname, context := range env.OsConfig.Contexts {
170
-			// set context, create factory, see what's available
171
-			if env.FactoryForContext[cname] == nil {
172
-				//config := clientcmd.NewNonInteractiveClientConfig(env.Factory.OpenShiftClientConfig, cname, &clientcmd.ConfigOverrides{})
173
-				config := clientcmd.NewNonInteractiveClientConfig(*env.OsConfig, cname, &clientcmd.ConfigOverrides{})
174
-				f := osclientcmd.NewFactory(config)
175
-				//f.BindFlags(env.Flags.OpenshiftFlags)
176
-				env.FactoryForContext[cname] = f
177
-			}
178
-			if access := getContextAccess(env.FactoryForContext[cname], cname, context, env.Log); access != nil {
179
-				env.AccessForContext[cname] = access
180
-				if access.ClusterAdmin && (cname == env.OsConfig.CurrentContext || env.ClusterAdminFactory == nil) {
181
-					env.ClusterAdminFactory = env.FactoryForContext[cname]
182
-				}
183
-			}
184
-		}
185
-	}
186
-}
187
-
188
-// for now, only try to determine what namespaces a user can see
189
-func getContextAccess(factory *osclientcmd.Factory, ctxName string, ctx clientcmdapi.Context, logger *log.Logger) *ContextAccess {
190
-	// start by getting ready to log the result
191
-	msgText := "Testing client config context {{.context}}\nServer: {{.server}}\nUser: {{.user}}\n\n"
192
-	msg := log.Msg{"id": "discCCctx", "tmpl": msgText}
193
-	if config, err := factory.OpenShiftClientConfig.RawConfig(); err != nil {
194
-		logger.Errorf("discCCstart", "Could not read client config: (%T) %[1]v", err)
195
-		return nil
196
-	} else {
197
-		msg["context"] = ctxName
198
-		msg["server"] = config.Clusters[ctx.Cluster].Server
199
-		msg["user"] = ctx.AuthInfo
200
-	}
201
-	// actually go and request project list from the server
202
-	if osclient, _, err := factory.Clients(); err != nil {
203
-		logger.Errorf("discCCctxClients", "Failed to create client during discovery with error:\n(%T) %[1]v\nThis is probably an OpenShift bug.", err)
204
-		return nil
205
-	} else if projects, err := osclient.Projects().List(labels.Everything(), fields.Everything()); err == nil { // success!
206
-		list := projects.Items
207
-		if len(list) == 0 {
208
-			msg["tmpl"] = msgText + "Successfully requested project list, but it is empty, so user has no access to anything."
209
-			msg["projects"] = make([]string, 0)
210
-			logger.Infom("discCCctxSuccess", msg)
211
-			return nil
212
-		}
213
-		access := &ContextAccess{Projects: make([]string, len(list))}
214
-		for i, project := range list {
215
-			access.Projects[i] = project.Name
216
-			if project.Name == kapi.NamespaceDefault {
217
-				access.ClusterAdmin = true
218
-			}
219
-		}
220
-		if access.ClusterAdmin {
221
-			msg["tmpl"] = msgText + "Successfully requested project list; has access to default project, so assumed to be a cluster-admin"
222
-			logger.Infom("discCCctxSuccess", msg)
223
-		} else {
224
-			msg["tmpl"] = msgText + "Successfully requested project list; has access to project(s): {{.projectStr}}"
225
-			msg["projects"] = access.Projects
226
-			msg["projectStr"] = strings.Join(access.Projects, ", ")
227
-			logger.Infom("discCCctxSuccess", msg)
228
-		}
229
-		return access
230
-	} else { // something went wrong, so diagnose it
231
-		noResolveRx := regexp.MustCompile("dial tcp: lookup (\\S+): no such host")
232
-		unknownCaMsg := "x509: certificate signed by unknown authority"
233
-		unneededCaMsg := "specifying a root certificates file with the insecure flag is not allowed"
234
-		invalidCertNameRx := regexp.MustCompile("x509: certificate is valid for (\\S+, )+not (\\S+)")
235
-		connRefusedRx := regexp.MustCompile("dial tcp (\\S+): connection refused")
236
-		connTimeoutRx := regexp.MustCompile("dial tcp (\\S+): (?:connection timed out|i/o timeout)")
237
-		unauthenticatedMsg := `403 Forbidden: Forbidden: "/osapi/v1beta1/projects?namespace=" denied by default`
238
-		unauthorizedRx := regexp.MustCompile("401 Unauthorized: Unauthorized$")
239
-
240
-		malformedHTTPMsg := "malformed HTTP response"
241
-		malformedTLSMsg := "tls: oversized record received with length"
242
-
243
-		// interpret the error message for mere mortals
244
-		errm := err.Error()
245
-		var reason, errId string
246
-		switch {
247
-		case noResolveRx.MatchString(errm):
248
-			errId, reason = "clientNoResolve", `
249
-This usually means that the hostname does not resolve to an IP.
250
-Hostnames should usually be resolved via DNS or an /etc/hosts file.
251
-Ensure that the hostname resolves correctly from your host before proceeding.
252
-Of course, your config could also simply have the wrong hostname specified.
253
-`
254
-		case strings.Contains(errm, unknownCaMsg):
255
-			errId, reason = "clientUnknownCa", `
256
-This means that we cannot validate the certificate in use by the
257
-OpenShift API server, so we cannot securely communicate with it.
258
-Connections could be intercepted and your credentials stolen.
259
-
260
-Since the server certificate we see when connecting is not validated
261
-by public certificate authorities (CAs), you probably need to specify a
262
-certificate from a private CA to validate the connection.
263
-
264
-Your config may be specifying the wrong CA cert, or none, or there
265
-could actually be a man-in-the-middle attempting to intercept your
266
-connection.  If you are unconcerned about any of this, you can add the
267
-but this is risky and should not be necessary.
268
-** Connections could be intercepted and your credentials stolen. **
269
-`
270
-		case strings.Contains(errm, unneededCaMsg):
271
-			errId, reason = "clientUnneededCa", `
272
-This means that for client connections to the OpenShift API server, you
273
-(or your kubeconfig) specified both a validating certificate authority
274
-and that the client should bypass connection security validation.
275
-
276
-This is not allowed because it is likely to be a mistake.
277
-
278
-If you want to use --insecure-skip-tls-verify to bypass security (which
279
-is usually a bad idea anyway), then you need to also clear the CA cert
280
-from your command line options or kubeconfig file(s). Of course, it
281
-would be far better to obtain and use a correct CA cert.
282
-`
283
-		case invalidCertNameRx.MatchString(errm):
284
-			match := invalidCertNameRx.FindStringSubmatch(errm)
285
-			serverHost := match[len(match)-1]
286
-			errId, reason = "clientInvCertName", fmt.Sprintf(`
287
-This means that the certificate in use by the OpenShift API server
288
-(master) does not match the hostname by which you are addressing it:
289
-  %s
290
-so a secure connection is not allowed. In theory, this *could* mean that
291
-someone is intercepting your connection and presenting a certificate
292
-that is valid but for a different server, which is why secure validation
293
-fails in this case.
294
-
295
-However, the most likely explanation is that the server certificate
296
-needs to be updated to include the name you are using to reach it.
297
-
298
-If the OpenShift server is generating its own certificates (which
299
-is default), then the --public-master flag on the OpenShift master is
300
-usually the easiest way to do this. If you need something more complicated
301
-(for instance, multiple public addresses for the API, or your own CA),
302
-then you will need to custom-generate the server certificate with the
303
-right names yourself.
304
-
305
-If you are unconcerned about any of this, you can add the
306
-but this is risky and should not be necessary.
307
-** Connections could be intercepted and your credentials stolen. **
308
-`, serverHost)
309
-		case connRefusedRx.MatchString(errm):
310
-			errId, reason = "clientInvCertName", `
311
-This means that when we tried to connect to the OpenShift API
312
-server (master), we reached the host, but nothing accepted the port
313
-connection. This could mean that the OpenShift master is stopped, or
314
-that a firewall or security policy is blocking access at that port.
315
-
316
-You will not be able to connect or do anything at all with OpenShift
317
-until this server problem is resolved or you specify a corrected
318
-server address.`
319
-		case connTimeoutRx.MatchString(errm):
320
-			errId, reason = "clientConnTimeout", `
321
-This means that when we tried to connect to the OpenShift API server
322
-(master), we could not reach the host at all.
323
-* You may have specified the wrong host address.
324
-* This could mean the host is completely unavailable (down).
325
-* This could indicate a routing problem or a firewall that simply
326
-  drops requests rather than responding by reseting the connection.
327
-* It does not generally mean that DNS name resolution failed (which
328
-  would be a different error) though the problem could be that it
329
-  gave the wrong address.`
330
-		case strings.Contains(errm, malformedHTTPMsg):
331
-			errId, reason = "clientMalformedHTTP", `
332
-This means that when we tried to connect to the OpenShift API server
333
-(master) with a plain HTTP connection, the server did not speak
334
-HTTP back to us. The most common explanation is that a secure server
335
-is listening but you specified an http: connection instead of https:.
336
-There could also be another service listening at the intended port
337
-speaking some other protocol entirely.
338
-
339
-You will not be able to connect or do anything at all with OpenShift
340
-until this server problem is resolved or you specify a corrected
341
-server address.`
342
-		case strings.Contains(errm, malformedTLSMsg):
343
-			errId, reason = "clientMalformedTLS", `
344
-This means that when we tried to connect to the OpenShift API server
345
-(master) with a secure HTTPS connection, the server did not speak
346
-HTTPS back to us. The most common explanation is that the server
347
-listening at that port is not the secure server you expected - it
348
-may be a non-secure HTTP server or the wrong service may be
349
-listening there, or you may have specified an incorrect port.
350
-
351
-You will not be able to connect or do anything at all with OpenShift
352
-until this server problem is resolved or you specify a corrected
353
-server address.`
354
-		case strings.Contains(errm, unauthenticatedMsg):
355
-			errId, reason = "clientUnauthn", `
356
-This means that when we tried to make a request to the OpenShift API
357
-server, your kubeconfig did not present valid credentials to
358
-authenticate your client. Credentials generally consist of a client
359
-key/certificate or an access token. Your kubeconfig may not have
360
-presented any, or they may be invalid.`
361
-		case unauthorizedRx.MatchString(errm):
362
-			errId, reason = "clientUnauthz", `
363
-This means that when we tried to make a request to the OpenShift API
364
-server, the request required credentials that were not presented.
365
-This can happen when an authentication token expires. Try logging in
366
-with this user again.`
367
-		default:
368
-			errId, reason = "clientUnknownConnErr", `Diagnostics does not have an explanation for what this means. Please report this error so one can be added.`
369
-		}
370
-		errMsg := fmt.Sprintf("(%T) %[1]v", err)
371
-		msg["tmpl"] = msgText + errMsg + reason
372
-		msg["errMsg"] = errMsg
373
-		logger.Errorm(errId, msg)
374
-	}
375
-	return nil
376
-}
377 1
deleted file mode 100644
... ...
@@ -1,19 +0,0 @@
1
-package discovery
2
-
3
-import (
4
-	"os/exec"
5
-	"runtime"
6
-)
7
-
8
-// ----------------------------------------------------------
9
-// Determine what we need to about the OS
10
-func (env *Environment) DiscoverOperatingSystem() {
11
-	if runtime.GOOS == "linux" {
12
-		if _, err := exec.LookPath("systemctl"); err == nil {
13
-			env.HasSystemd = true
14
-		}
15
-		if _, err := exec.LookPath("/bin/bash"); err == nil {
16
-			env.HasBash = true
17
-		}
18
-	}
19
-}
20 1
new file mode 100644
... ...
@@ -0,0 +1,46 @@
0
+package host
1
+
2
+import (
3
+	"errors"
4
+
5
+	configapilatest "github.com/openshift/origin/pkg/cmd/server/api/latest"
6
+	configvalidation "github.com/openshift/origin/pkg/cmd/server/api/validation"
7
+	"github.com/openshift/origin/pkg/diagnostics/types"
8
+)
9
+
10
+// MasterConfigCheck
11
+type MasterConfigCheck struct {
12
+	MasterConfigFile string
13
+}
14
+
15
+func (d MasterConfigCheck) Name() string {
16
+	return "MasterConfigCheck"
17
+}
18
+
19
+func (d MasterConfigCheck) Description() string {
20
+	return "Check the master config file"
21
+}
22
+func (d MasterConfigCheck) CanRun() (bool, error) {
23
+	if len(d.MasterConfigFile) == 0 {
24
+		return false, errors.New("must have master config file")
25
+	}
26
+
27
+	return true, nil
28
+}
29
+func (d MasterConfigCheck) Check() *types.DiagnosticResult {
30
+	r := types.NewDiagnosticResult("MasterConfigCheck")
31
+
32
+	r.Debugf("discMCfile", "Looking for master config file at '%s'", d.MasterConfigFile)
33
+	masterConfig, err := configapilatest.ReadAndResolveMasterConfig(d.MasterConfigFile)
34
+	if err != nil {
35
+		r.Errorf("discMCfail", err, "Could not read master config file '%s':\n(%T) %[2]v", d.MasterConfigFile, err)
36
+		return r
37
+	}
38
+
39
+	r.Infof("discMCfound", "Found a master config file: %[1]s", d.MasterConfigFile)
40
+
41
+	for _, err := range configvalidation.ValidateMasterConfig(masterConfig).Errors {
42
+		r.Errorf("discMCinvalid", err, "Validation of master config file '%s' failed:\n(%T) %[2]v", d.MasterConfigFile, err)
43
+	}
44
+	return r
45
+}
0 46
new file mode 100644
... ...
@@ -0,0 +1,45 @@
0
+package host
1
+
2
+import (
3
+	"errors"
4
+
5
+	configapilatest "github.com/openshift/origin/pkg/cmd/server/api/latest"
6
+	configvalidation "github.com/openshift/origin/pkg/cmd/server/api/validation"
7
+	"github.com/openshift/origin/pkg/diagnostics/types"
8
+)
9
+
10
+// NodeConfigCheck
11
+type NodeConfigCheck struct {
12
+	NodeConfigFile string
13
+}
14
+
15
+func (d NodeConfigCheck) Name() string {
16
+	return "NodeConfigCheck"
17
+}
18
+
19
+func (d NodeConfigCheck) Description() string {
20
+	return "Check the node config file"
21
+}
22
+func (d NodeConfigCheck) CanRun() (bool, error) {
23
+	if len(d.NodeConfigFile) == 0 {
24
+		return false, errors.New("must have node config file")
25
+	}
26
+
27
+	return true, nil
28
+}
29
+func (d NodeConfigCheck) Check() *types.DiagnosticResult {
30
+	r := types.NewDiagnosticResult("NodeConfigCheck")
31
+	r.Debugf("discNCfile", "Looking for node config file at '%s'", d.NodeConfigFile)
32
+	nodeConfig, err := configapilatest.ReadAndResolveNodeConfig(d.NodeConfigFile)
33
+	if err != nil {
34
+		r.Errorf("discNCfail", err, "Could not read node config file '%s':\n(%T) %[2]v", d.NodeConfigFile, err)
35
+		return r
36
+	}
37
+
38
+	r.Infof("discNCfound", "Found a node config file: %[1]s", d.NodeConfigFile)
39
+
40
+	for _, err := range configvalidation.ValidateNodeConfig(nodeConfig) {
41
+		r.Errorf("discNCinvalid", err, "Validation of node config file '%s' failed:\n(%T) %[2]v", d.NodeConfigFile, err)
42
+	}
43
+	return r
44
+}
... ...
@@ -12,7 +12,7 @@ type jsonLogger struct {
12 12
 	logFinished bool
13 13
 }
14 14
 
15
-func (j *jsonLogger) Write(entry LogEntry) {
15
+func (j *jsonLogger) Write(entry Entry) {
16 16
 	if j.logStarted {
17 17
 		fmt.Fprintln(j.out, ",")
18 18
 	} else {
... ...
@@ -7,8 +7,11 @@ import (
7 7
 	ct "github.com/daviddengcn/go-colortext"
8 8
 	"io"
9 9
 	"io/ioutil"
10
+	"runtime"
10 11
 	"strings"
11 12
 	"text/template"
13
+
14
+	"github.com/openshift/origin/pkg/version"
12 15
 )
13 16
 
14 17
 type LoggerOptions struct {
... ...
@@ -35,6 +38,14 @@ type Level struct {
35 35
 	Bright bool
36 36
 }
37 37
 
38
+func (l Level) MarshalJSON() ([]byte, error) {
39
+	return []byte(`"` + l.Name + `"`), nil
40
+}
41
+
42
+func (l Level) MarshalYAML() (interface{}, error) {
43
+	return l.Name, nil
44
+}
45
+
38 46
 type Logger struct {
39 47
 	loggerType
40 48
 	level        Level
... ...
@@ -44,7 +55,7 @@ type Logger struct {
44 44
 
45 45
 // Internal type to deal with different log formats
46 46
 type loggerType interface {
47
-	Write(LogEntry)
47
+	Write(Entry)
48 48
 	Finish()
49 49
 }
50 50
 
... ...
@@ -85,18 +96,23 @@ func NewLogger(setLevel int, setFormat string, out io.Writer) (*Logger, error) {
85 85
 }
86 86
 
87 87
 type Message struct {
88
-	ID       string
89
-	Template string
90
-
88
+	// ID: an identifier unique to the message being logged, intended for json/yaml output
89
+	//     so that automation can recognize specific messages without trying to parse them.
90
+	ID string `json:"-" yaml:"-"`
91
+	// Template: a template string as understood by text/template that can use any of the
92
+	//           TemplateData entries in this Message as inputs.
93
+	Template string `json:"-" yaml:"-"`
91 94
 	// TemplateData is passed to template executor to complete the message
92
-	TemplateData interface{}
95
+	TemplateData interface{} `json:"data,omitempty" yaml:"data,omitempty"`
93 96
 
94
-	EvaluatedText string
97
+	EvaluatedText string `json:"text" yaml:"text"` // human-readable message text
95 98
 }
96 99
 
100
+type Hash map[string]interface{} // convenience/cosmetic type
101
+
97 102
 func (m Message) String() string {
98 103
 	if len(m.EvaluatedText) > 0 {
99
-		return fmt.Sprintf("%s: %s", m.EvaluatedText)
104
+		return m.EvaluatedText
100 105
 	}
101 106
 
102 107
 	if len(m.Template) == 0 {
... ...
@@ -105,7 +121,7 @@ func (m Message) String() string {
105 105
 
106 106
 	// if given a template, convert it to text
107 107
 	parsedTmpl, err := template.New(m.ID).Parse(m.Template)
108
-	if err != nil {
108
+	if err != nil { // unless the template is broken of course
109 109
 		return fmt.Sprintf("%s: %s %#v: %v", m.ID, m.Template, m.TemplateData, err)
110 110
 	}
111 111
 
... ...
@@ -118,23 +134,13 @@ func (m Message) String() string {
118 118
 	return buff.String()
119 119
 }
120 120
 
121
-type LogEntry struct {
122
-	Level Level
123
-	Message
121
+type Entry struct {
122
+	ID      string `json:"id"`
123
+	Origin  string `json:"origin"`
124
+	Level   Level  `json:"level"`
125
+	Message `yaml:"-,inline"`
124 126
 }
125 127
 
126
-/* a Msg can be expected to have the following entries:
127
- * "id": an identifier unique to the message being logged, intended for json/yaml output
128
- *       so that automation can recognize specific messages without trying to parse them.
129
- * "text": human-readable message text
130
- * "tmpl": a template string as understood by text/template that can use any of the other
131
- *         entries in this Msg as inputs. This is removed, evaluated, and the result is
132
- *         placed in "text". If there is an error during evaluation, the error is placed
133
- *         in "templateErr", the original id of the message is stored in "templateId",
134
- *         and the Msg id is changed to "tmplErr". Of course, this should never happen
135
- *         if there are no mistakes in the calling code.
136
- */
137
-
138 128
 var (
139 129
 	ErrorLevel  = Level{4, "error", "ERROR: ", ct.Red, true}   // Something is definitely wrong
140 130
 	WarnLevel   = Level{3, "warn", "WARN:  ", ct.Yellow, true} // Likely to be an issue but maybe not
... ...
@@ -144,136 +150,126 @@ var (
144 144
 )
145 145
 
146 146
 // Provide a summary at the end
147
-func (l *Logger) Summary() {
148
-	l.Notice("summary", "\nSummary of diagnostics execution:\n")
149
-	if l.warningsSeen > 0 {
150
-		l.Noticef("sumWarn", "Warnings seen: %d", l.warningsSeen)
147
+func (l *Logger) Summary(warningsSeen int, errorsSeen int) {
148
+	l.Noticef("summary", "\nSummary of diagnostics execution (version %v):\n", version.Get())
149
+	if warningsSeen > 0 {
150
+		l.Noticet("sumWarn", "Warnings seen: {{.warnings}}", Hash{"warnings": warningsSeen})
151 151
 	}
152
-	if l.errorsSeen > 0 {
153
-		l.Noticef("sumErr", "Errors seen: %d", l.errorsSeen)
152
+	if errorsSeen > 0 {
153
+		l.Noticet("sumErr", "Errors seen: {{.errors}}", Hash{"errors": errorsSeen})
154 154
 	}
155
-	if l.warningsSeen == 0 && l.errorsSeen == 0 {
155
+	if warningsSeen == 0 && errorsSeen == 0 {
156 156
 		l.Notice("sumNone", "Completed with no errors or warnings seen.")
157 157
 	}
158 158
 }
159 159
 
160
-func (l *Logger) LogMessage(level Level, message Message) {
161
-	// if there's no logger, return silently
162
-	if l == nil {
160
+func (l *Logger) LogEntry(entry Entry) {
161
+	if l == nil { // if there's no logger, return silently
163 162
 		return
164 163
 	}
165
-
166
-	// track how many of every type we've seen (probably unnecessary)
167
-	if level.Level == ErrorLevel.Level {
168
-		l.errorsSeen += 1
169
-	} else if level.Level == WarnLevel.Level {
170
-		l.warningsSeen += 1
171
-	}
172
-
173
-	if level.Level < l.level.Level {
174
-		return
175
-	}
176
-
177
-	if len(message.Template) == 0 {
178
-		l.Write(LogEntry{level, message})
164
+	if entry.Level.Level < l.level.Level { // logging level says skip this entry
179 165
 		return
180 166
 	}
181 167
 
182
-	// if given a template, convert it to text
183
-	parsedTmpl, err := template.New(message.ID).Parse(message.Template)
184
-	if err != nil {
185
-		templateErrorMessage := Message{
186
-			ID: "templateParseErr",
187
-			TemplateData: map[string]interface{}{
188
-				"error":           err.Error(),
189
-				"originalMessage": message,
190
-			},
168
+	if msg := &entry.Message; msg.EvaluatedText == "" && msg.Template != "" {
169
+		// if given a template instead of text, convert it to text
170
+		parsedTmpl, err := template.New(msg.ID).Parse(msg.Template)
171
+		if err != nil {
172
+			entry.Message = Message{
173
+				ID: "templateParseErr",
174
+				TemplateData: Hash{
175
+					"error":           err.Error(),
176
+					"originalMessage": msg,
177
+				},
178
+				EvaluatedText: fmt.Sprintf("Error parsing template for %s:\n%s=== Error was:\n%v\nOriginal message:\n%#v", msg.ID, msg.Template, err, msg),
179
+			}
180
+			entry.ID = entry.Message.ID
181
+			l.Write(entry)
182
+			return
191 183
 		}
192
-		l.LogMessage(level, templateErrorMessage)
193
-		return
194
-	}
195 184
 
196
-	var buff bytes.Buffer
197
-	err = parsedTmpl.Execute(&buff, message.TemplateData)
198
-	if err != nil {
199
-		templateErrorMessage := Message{
200
-			ID: "templateParseErr",
201
-			TemplateData: map[string]interface{}{
202
-				"error":           err.Error(),
203
-				"originalMessage": message,
204
-			},
185
+		var buff bytes.Buffer
186
+		err = parsedTmpl.Execute(&buff, msg.TemplateData)
187
+		if err != nil {
188
+			entry.Message = Message{
189
+				ID: "templateExecErr",
190
+				TemplateData: Hash{
191
+					"error":           err.Error(),
192
+					"originalMessage": msg,
193
+				},
194
+				EvaluatedText: fmt.Sprintf("Error executing template for %s:\n%s=== Error was:\n%v\nOriginal message:\n%#v", msg.ID, msg.Template, err, msg),
195
+			}
196
+			entry.ID = entry.Message.ID
197
+			l.Write(entry)
198
+			return
205 199
 		}
206
-		l.LogMessage(level, templateErrorMessage)
207
-		return
208 200
 
201
+		msg.EvaluatedText = buff.String()
209 202
 	}
210 203
 
211
-	message.EvaluatedText = buff.String()
212
-	l.Write(LogEntry{level, message})
204
+	l.Write(entry)
213 205
 }
214 206
 
215 207
 // Convenience functions
216 208
 func (l *Logger) Error(id string, text string) {
217
-	l.Logp(ErrorLevel, id, text)
209
+	l.logp(ErrorLevel, id, text)
218 210
 }
219 211
 func (l *Logger) Errorf(id string, msg string, a ...interface{}) {
220
-	l.Logpf(ErrorLevel, id, msg, a...)
212
+	l.logf(ErrorLevel, id, msg, a...)
221 213
 }
222
-func (l *Logger) Errorm(message Message) {
223
-	l.LogMessage(ErrorLevel, message)
214
+func (l *Logger) Errort(id string, template string, data interface{}) {
215
+	l.logt(ErrorLevel, id, template, data)
224 216
 }
225 217
 func (l *Logger) Warn(id string, text string) {
226
-	l.Logp(WarnLevel, id, text)
218
+	l.logp(WarnLevel, id, text)
227 219
 }
228 220
 func (l *Logger) Warnf(id string, msg string, a ...interface{}) {
229
-	l.Logpf(WarnLevel, id, msg, a...)
230
-}
231
-func (l *Logger) Warnm(message Message) {
232
-	l.LogMessage(WarnLevel, message)
221
+	l.logf(WarnLevel, id, msg, a...)
233 222
 }
234 223
 func (l *Logger) Info(id string, text string) {
235
-	l.Logp(InfoLevel, id, text)
224
+	l.logp(InfoLevel, id, text)
236 225
 }
237 226
 func (l *Logger) Infof(id string, msg string, a ...interface{}) {
238
-	l.Logpf(InfoLevel, id, msg, a...)
239
-}
240
-func (l *Logger) Infom(message Message) {
241
-	l.LogMessage(InfoLevel, message)
227
+	l.logf(InfoLevel, id, msg, a...)
242 228
 }
243 229
 func (l *Logger) Notice(id string, text string) {
244
-	l.Logp(NoticeLevel, id, text)
230
+	l.logp(NoticeLevel, id, text)
245 231
 }
246 232
 func (l *Logger) Noticef(id string, msg string, a ...interface{}) {
247
-	l.Logpf(NoticeLevel, id, msg, a...)
233
+	l.logf(NoticeLevel, id, msg, a...)
248 234
 }
249
-func (l *Logger) Noticem(message Message) {
250
-	l.LogMessage(NoticeLevel, message)
235
+func (l *Logger) Noticet(id string, template string, data interface{}) {
236
+	l.logt(NoticeLevel, id, template, data)
251 237
 }
252 238
 func (l *Logger) Debug(id string, text string) {
253
-	l.Logp(DebugLevel, id, text)
239
+	l.logp(DebugLevel, id, text)
254 240
 }
255 241
 func (l *Logger) Debugf(id string, msg string, a ...interface{}) {
256
-	l.Logpf(DebugLevel, id, msg, a...)
257
-}
258
-func (l *Logger) Debugm(message Message) {
259
-	l.LogMessage(DebugLevel, message)
242
+	l.logf(DebugLevel, id, msg, a...)
260 243
 }
261 244
 
262
-func (l *Logger) Logp(level Level, id string, text string) {
263
-	l.LogMessage(level, Message{ID: id, EvaluatedText: text})
245
+func origin(skip int) string {
246
+	if _, file, _, ok := runtime.Caller(skip + 1); ok {
247
+		paths := strings.SplitAfter(file, "github.com/")
248
+		return "controller " + paths[len(paths)-1]
249
+	} else {
250
+		return "unknown"
251
+	}
252
+}
253
+func (l *Logger) logp(level Level, id string, text string) {
254
+	l.LogEntry(Entry{id, origin(1), level, Message{ID: id, EvaluatedText: text}})
264 255
 }
265
-func (l *Logger) Logpf(level Level, id string, msg string, a ...interface{}) {
266
-	l.Logp(level, id, fmt.Sprintf(msg, a...))
256
+func (l *Logger) logf(level Level, id string, msg string, a ...interface{}) {
257
+	l.LogEntry(Entry{id, origin(1), level, Message{ID: id, EvaluatedText: fmt.Sprintf(msg, a...)}})
258
+}
259
+func (l *Logger) logt(level Level, id string, template string, data interface{}) {
260
+	l.LogEntry(Entry{id, origin(1), level, Message{ID: id, Template: template, TemplateData: data}})
267 261
 }
268 262
 
269 263
 func (l *Logger) Finish() {
270 264
 	l.loggerType.Finish()
271 265
 }
272 266
 
273
-func (l *Logger) ErrorsSeen() bool {
274
-	return l.errorsSeen > 0
275
-}
276
-
277 267
 // turn excess lines into [...]
278 268
 func LimitLines(msg string, n int) string {
279 269
 	lines := strings.SplitN(msg, "\n", n+1)
... ...
@@ -31,11 +31,14 @@ func IsTerminal(w io.Writer) bool {
31 31
 	return ok && term.IsTerminal(file.Fd())
32 32
 }
33 33
 
34
-func (t *textLogger) Write(entry LogEntry) {
34
+func (t *textLogger) Write(entry Entry) {
35 35
 	if t.ttyOutput {
36 36
 		ct.ChangeColor(entry.Level.Color, entry.Level.Bright, ct.None, false)
37 37
 	}
38
-	text := strings.TrimSpace(entry.EvaluatedText)
38
+	text := strings.TrimSpace(entry.Message.EvaluatedText)
39
+	if entry.Level.Level >= WarnLevel.Level {
40
+		text = fmt.Sprintf("[ID \"%s\" from %s]\n", entry.ID, entry.Origin) + text
41
+	}
39 42
 	if strings.Contains(text, "\n") { // separate multiline comments with newlines
40 43
 		if !t.lastNewline {
41 44
 			fmt.Fprintln(t.out) // separate from previous one-line log msg
... ...
@@ -11,7 +11,7 @@ type yamlLogger struct {
11 11
 	logStarted bool
12 12
 }
13 13
 
14
-func (y *yamlLogger) Write(entry LogEntry) {
14
+func (y *yamlLogger) Write(entry Entry) {
15 15
 	b, _ := yaml.Marshal(&entry)
16 16
 	fmt.Fprintln(y.out, "---\n"+string(b))
17 17
 }
18 18
deleted file mode 100644
... ...
@@ -1,48 +0,0 @@
1
-package master
2
-
3
-import (
4
-	"errors"
5
-
6
-	configapilatest "github.com/openshift/origin/pkg/cmd/server/api/latest"
7
-	configvalidation "github.com/openshift/origin/pkg/cmd/server/api/validation"
8
-	"github.com/openshift/origin/pkg/diagnostics/log"
9
-)
10
-
11
-// MasterConfigCheck
12
-type MasterConfigCheck struct {
13
-	MasterConfigFile string
14
-
15
-	Log *log.Logger
16
-}
17
-
18
-func (d MasterConfigCheck) Description() string {
19
-	return "Check the master config file"
20
-}
21
-func (d MasterConfigCheck) CanRun() (bool, error) {
22
-	if len(d.MasterConfigFile) == 0 {
23
-		return false, errors.New("must have master config file")
24
-	}
25
-
26
-	return true, nil
27
-}
28
-func (d MasterConfigCheck) Check() (bool, []log.Message, []error, []error) {
29
-	if _, err := d.CanRun(); err != nil {
30
-		return false, nil, nil, []error{err}
31
-	}
32
-
33
-	d.Log.Debugf("discMCfile", "Looking for master config file at '%s'", d.MasterConfigFile)
34
-	masterConfig, err := configapilatest.ReadAndResolveMasterConfig(d.MasterConfigFile)
35
-	if err != nil {
36
-		d.Log.Errorf("discMCfail", "Could not read master config file '%s':\n(%T) %[2]v", d.MasterConfigFile, err)
37
-
38
-		return false, nil, nil, []error{err}
39
-	}
40
-
41
-	d.Log.Infof("discMCfound", "Found a master config file:\n%[1]s", d.MasterConfigFile)
42
-
43
-	if validationResults := configvalidation.ValidateMasterConfig(masterConfig); len(validationResults.Errors) > 0 {
44
-		return false, nil, nil, validationResults.Errors
45
-	}
46
-
47
-	return true, nil, nil, nil
48
-}
49 1
deleted file mode 100644
... ...
@@ -1,48 +0,0 @@
1
-package node
2
-
3
-import (
4
-	"errors"
5
-
6
-	configapilatest "github.com/openshift/origin/pkg/cmd/server/api/latest"
7
-	configvalidation "github.com/openshift/origin/pkg/cmd/server/api/validation"
8
-	"github.com/openshift/origin/pkg/diagnostics/log"
9
-)
10
-
11
-// NodeConfigCheck
12
-type NodeConfigCheck struct {
13
-	NodeConfigFile string
14
-
15
-	Log *log.Logger
16
-}
17
-
18
-func (d NodeConfigCheck) Description() string {
19
-	return "Check the node config file"
20
-}
21
-func (d NodeConfigCheck) CanRun() (bool, error) {
22
-	if len(d.NodeConfigFile) == 0 {
23
-		return false, errors.New("must have node config file")
24
-	}
25
-
26
-	return true, nil
27
-}
28
-func (d NodeConfigCheck) Check() (bool, []log.Message, []error, []error) {
29
-	if _, err := d.CanRun(); err != nil {
30
-		return false, nil, nil, []error{err}
31
-	}
32
-
33
-	d.Log.Debugf("discNCfile", "Looking for node config file at '%s'", d.NodeConfigFile)
34
-	nodeConfig, err := configapilatest.ReadAndResolveNodeConfig(d.NodeConfigFile)
35
-	if err != nil {
36
-		d.Log.Errorf("discNCfail", "Could not read node config file '%s':\n(%T) %[2]v", d.NodeConfigFile, err)
37
-
38
-		return false, nil, nil, []error{err}
39
-	}
40
-
41
-	d.Log.Infof("discNCfound", "Found a node config file:\n%[1]s", d.NodeConfigFile)
42
-
43
-	if validationErrors := configvalidation.ValidateNodeConfig(nodeConfig); len(validationErrors) > 0 {
44
-		return false, nil, nil, validationErrors
45
-	}
46
-
47
-	return true, nil, nil, nil
48
-}
... ...
@@ -3,38 +3,44 @@ package systemd
3 3
 import (
4 4
 	"bufio"
5 5
 	"encoding/json"
6
-	"fmt"
7 6
 	"io"
8 7
 	"os/exec"
8
+	"strconv"
9
+	"time"
9 10
 
10 11
 	"github.com/openshift/origin/pkg/diagnostics/log"
11 12
 	"github.com/openshift/origin/pkg/diagnostics/types"
12
-	"github.com/openshift/origin/pkg/diagnostics/types/diagnostic"
13
+)
14
+
15
+const (
16
+	sdLogReadErr = `Diagnostics failed to query journalctl for the '%s' unit logs.
17
+This should be very unusual, so please report this error:
18
+%s`
13 19
 )
14 20
 
15 21
 // AnalyzeLogs
16 22
 type AnalyzeLogs struct {
17 23
 	SystemdUnits map[string]types.SystemdUnit
24
+}
18 25
 
19
-	Log *log.Logger
26
+func (d AnalyzeLogs) Name() string {
27
+	return "AnalyzeLogs"
20 28
 }
21 29
 
22 30
 func (d AnalyzeLogs) Description() string {
23
-	return "Check for problems in systemd service logs since each service last started"
31
+	return "Check for recent problems in systemd service logs"
24 32
 }
33
+
25 34
 func (d AnalyzeLogs) CanRun() (bool, error) {
26 35
 	return true, nil
27 36
 }
28
-func (d AnalyzeLogs) Check() (bool, []log.Message, []error, []error) {
29
-	infos := []log.Message{}
30
-	warnings := []error{}
31
-	errors := []error{}
37
+
38
+func (d AnalyzeLogs) Check() *types.DiagnosticResult {
39
+	r := types.NewDiagnosticResult("AnalyzeLogs")
32 40
 
33 41
 	for _, unit := range unitLogSpecs {
34 42
 		if svc := d.SystemdUnits[unit.Name]; svc.Enabled || svc.Active {
35
-			checkMessage := log.Message{ID: "sdCheckLogs", EvaluatedText: fmt.Sprintf("Checking journalctl logs for '%s' service", unit.Name)}
36
-			d.Log.LogMessage(log.InfoLevel, checkMessage)
37
-			infos = append(infos, checkMessage)
43
+			r.Infof("sdCheckLogs", "Checking journalctl logs for '%s' service", unit.Name)
38 44
 
39 45
 			cmd := exec.Command("journalctl", "-ru", unit.Name, "--output=json")
40 46
 			// JSON comes out of journalctl one line per record
... ...
@@ -50,60 +56,54 @@ func (d AnalyzeLogs) Check() (bool, []log.Message, []error, []error) {
50 50
 			}(cmd)
51 51
 
52 52
 			if err != nil {
53
-				diagnosticError := diagnostic.NewDiagnosticError("sdLogReadErr", fmt.Sprintf(sdLogReadErr, unit.Name, errStr(err)), err)
54
-				d.Log.Error(diagnosticError.ID, diagnosticError.Explanation)
55
-				errors = append(errors, diagnosticError)
56
-
57
-				return false, infos, warnings, errors
53
+				r.Errorf("sdLogReadErr", err, sdLogReadErr, unit.Name, errStr(err))
54
+				return r
58 55
 			}
59 56
 			defer func() { // close out pipe once done reading
60 57
 				reader.Close()
61 58
 				cmd.Wait()
62 59
 			}()
63
-			entryTemplate := logEntry{Message: `json:"MESSAGE"`}
60
+			timeLimit := time.Now().Add(-time.Hour)                     // if it didn't happen in the last hour, probably not too relevant
64 61
 			matchCopy := append([]logMatcher(nil), unit.LogMatchers...) // make a copy, will remove matchers after they match something
65
-			for lineReader.Scan() {                                     // each log entry is a line
62
+			lineCount := 0                                              // each log entry is a line
63
+			for lineReader.Scan() {
64
+				lineCount += 1
66 65
 				if len(matchCopy) == 0 { // if no rules remain to match
67 66
 					break // don't waste time reading more log entries
68 67
 				}
69
-				bytes, entry := lineReader.Bytes(), entryTemplate
68
+				bytes, entry := lineReader.Bytes(), logEntry{}
70 69
 				if err := json.Unmarshal(bytes, &entry); err != nil {
71
-					badJSONMessage := log.Message{ID: "sdLogBadJSON", EvaluatedText: fmt.Sprintf("Couldn't read the JSON for this log message:\n%s\nGot error %s", string(bytes), errStr(err))}
72
-					d.Log.LogMessage(log.DebugLevel, badJSONMessage)
73
-
70
+					r.Debugf("sdLogBadJSON", "Couldn't read the JSON for this log message:\n%s\nGot error %s", string(bytes), errStr(err))
74 71
 				} else {
72
+					if lineCount > 500 && stampTooOld(entry.TimeStamp, timeLimit) {
73
+						r.Debugf("sdLogTrunc", "Stopped reading %s log: timestamp %s too old", unit.Name, entry.TimeStamp)
74
+						break // if we've analyzed at least 500 entries, stop when age limit reached (don't scan days of logs)
75
+					}
75 76
 					if unit.StartMatch.MatchString(entry.Message) {
76
-						break // saw the log message where the unit started; done looking.
77
+						break // saw log message for unit startup; don't analyze previous logs
77 78
 					}
78 79
 					for index, match := range matchCopy { // match log message against provided matchers
79 80
 						if strings := match.Regexp.FindStringSubmatch(entry.Message); strings != nil {
80 81
 							// if matches: print interpretation, remove from matchCopy, and go on to next log entry
81
-							keep := match.KeepAfterMatch
82
-							if match.Interpret != nil {
83
-								currKeep, currInfos, currWarnings, currErrors := match.Interpret(d.Log, &entry, strings)
82
+							keep := match.KeepAfterMatch // generic keep logic
83
+							if match.Interpret != nil {  // apply custom match logic
84
+								currKeep, result := match.Interpret(&entry, strings)
84 85
 								keep = currKeep
85
-								infos = append(infos, currInfos...)
86
-								warnings = append(warnings, currWarnings...)
87
-								errors = append(errors, currErrors...)
88
-
89
-							} else {
90
-								text := fmt.Sprintf("Found '%s' journald log message:\n  %s\n", unit.Name, entry.Message) + match.Interpretation
91
-								message := log.Message{ID: match.Id, EvaluatedText: text, TemplateData: map[string]string{"unit": unit.Name, "logMsg": entry.Message}}
92
-								d.Log.LogMessage(match.Level, message)
93
-								diagnosticError := diagnostic.NewDiagnosticError(match.Id, text, nil)
86
+								r.Append(result)
87
+							} else { // apply generic match processing
88
+								template := "Found '{{.unit}}' journald log message:\n  {{.logMsg}}\n{{.interpretation}}"
89
+								templateData := log.Hash{"unit": unit.Name, "logMsg": entry.Message, "interpretation": match.Interpretation}
94 90
 
95 91
 								switch match.Level {
96
-								case log.InfoLevel, log.NoticeLevel:
97
-									infos = append(infos, message)
98
-
92
+								case log.DebugLevel:
93
+									r.Debugt(match.Id, template, templateData)
94
+								case log.InfoLevel:
95
+									r.Infot(match.Id, template, templateData)
99 96
 								case log.WarnLevel:
100
-									warnings = append(warnings, diagnosticError)
101
-
97
+									r.Warnt(match.Id, nil, template, templateData)
102 98
 								case log.ErrorLevel:
103
-									errors = append(errors, diagnosticError)
104
-
99
+									r.Errort(match.Id, nil, template, templateData)
105 100
 								}
106
-
107 101
 							}
108 102
 
109 103
 							if !keep { // remove matcher once seen
... ...
@@ -118,11 +118,12 @@ func (d AnalyzeLogs) Check() (bool, []log.Message, []error, []error) {
118 118
 		}
119 119
 	}
120 120
 
121
-	return (len(errors) == 0), infos, warnings, errors
121
+	return r
122 122
 }
123 123
 
124
-const (
125
-	sdLogReadErr = `Diagnostics failed to query journalctl for the '%s' unit logs.
126
-This should be very unusual, so please report this error:
127
-%s`
128
-)
124
+func stampTooOld(stamp string, timeLimit time.Time) bool {
125
+	if epochns, err := strconv.ParseInt(stamp, 10, 64); err == nil {
126
+		return time.Unix(epochns/1000000, 0).Before(timeLimit)
127
+	}
128
+	return true // something went wrong, stop looking...
129
+}
... ...
@@ -12,7 +12,7 @@ import (
12 12
 func GetSystemdUnits(logger *log.Logger) map[string]types.SystemdUnit {
13 13
 	systemdUnits := map[string]types.SystemdUnit{}
14 14
 
15
-	logger.Notice("discBegin", "Beginning systemd discovery")
15
+	logger.Notice("discBeginSysd", "Performing systemd discovery")
16 16
 	for _, name := range []string{"openshift", "openshift-master", "openshift-node", "openshift-sdn-master", "openshift-sdn-node", "docker", "openvswitch", "iptables", "etcd", "kubernetes"} {
17 17
 		systemdUnits[name] = discoverSystemdUnit(logger, name)
18 18
 
... ...
@@ -5,11 +5,12 @@ import (
5 5
 
6 6
 	"fmt"
7 7
 	"github.com/openshift/origin/pkg/diagnostics/log"
8
-	"github.com/openshift/origin/pkg/diagnostics/types/diagnostic"
8
+	"github.com/openshift/origin/pkg/diagnostics/types"
9 9
 )
10 10
 
11 11
 type logEntry struct {
12
-	Message string // I feel certain we will want more fields at some point
12
+	Message   string `json:"MESSAGE"`
13
+	TimeStamp string `json:"__REALTIME_TIMESTAMP"` // epoch + ns
13 14
 }
14 15
 
15 16
 type logMatcher struct { // regex for scanning log messages and interpreting them when found
... ...
@@ -19,10 +20,9 @@ type logMatcher struct { // regex for scanning log messages and interpreting the
19 19
 	Interpretation string // log with above level+id if it's simple
20 20
 	KeepAfterMatch bool   // usually note only first matched entry, ignore rest
21 21
 	Interpret      func(  // run this for custom logic on match
22
-		logger *log.Logger,
23 22
 		entry *logEntry,
24 23
 		matches []string,
25
-	) (bool, []log.Message, []error, []error) // KeepAfterMatch?
24
+	) (bool /* KeepAfterMatch? */, *types.DiagnosticResult)
26 25
 }
27 26
 
28 27
 type unitSpec struct {
... ...
@@ -32,8 +32,8 @@ type unitSpec struct {
32 32
 }
33 33
 
34 34
 //
35
-// -------- Things that feed into the diagnostics definitions -----------
36
-// Search for Diagnostics for the actual diagnostics.
35
+// -------- These are things that feed into the diagnostics definitions -----------
36
+//
37 37
 
38 38
 // Reusable log matchers:
39 39
 var badImageTemplate = logMatcher{
... ...
@@ -81,15 +81,15 @@ logs after the node is actually available.`,
81 81
 				// TODO: don't rely on ipv4 format, should be ipv6 "soon"
82 82
 				Regexp: regexp.MustCompile("http: TLS handshake error from ([\\d.]+):\\d+: remote error: bad certificate"),
83 83
 				Level:  log.WarnLevel,
84
-				Interpret: func(logger *log.Logger, entry *logEntry, matches []string) (bool, []log.Message, []error, []error) {
85
-					warnings := []error{}
84
+				Interpret: func(entry *logEntry, matches []string) (bool, *types.DiagnosticResult) {
85
+					r := types.NewDiagnosticResult("openshift-master.journald")
86 86
 
87 87
 					client := matches[1]
88 88
 					prelude := fmt.Sprintf("Found 'openshift-master' journald log message:\n  %s\n", entry.Message)
89 89
 					if tlsClientErrorSeen == nil { // first time this message was seen
90 90
 						tlsClientErrorSeen = map[string]bool{client: true}
91 91
 						// TODO: too generic, adjust message depending on subnet of the "from" address
92
-						diagnosticError := diagnostic.NewDiagnosticError("sdLogOMreBadCert", prelude+`
92
+						r.Warn("sdLogOMreBadCert", nil, prelude+`
93 93
 This error indicates that a client attempted to connect to the master
94 94
 HTTPS API server but broke off the connection because the master's
95 95
 certificate is not validated by a cerificate authority (CA) acceptable
... ...
@@ -127,21 +127,13 @@ log message:
127 127
   (so this message may simply indicate that the master generated a new
128 128
   server certificate, e.g. to add a different --public-master, and a
129 129
   browser hasn't accepted it yet and is still attempting API calls;
130
-  try logging out of the console and back in again).`, nil)
131
-
132
-						message := log.Message{ID: diagnosticError.ID, EvaluatedText: diagnosticError.Explanation, TemplateData: map[string]string{"client": client}}
133
-						logger.LogMessage(log.WarnLevel, message)
134
-						warnings = append(warnings, diagnosticError)
130
+  try logging out of the console and back in again).`)
135 131
 
136 132
 					} else if !tlsClientErrorSeen[client] {
137 133
 						tlsClientErrorSeen[client] = true
138
-						diagnosticError := diagnostic.NewDiagnosticError("sdLogOMreBadCert", prelude+`This message was diagnosed above, but for a different client address.`, nil)
139
-						message := log.Message{ID: diagnosticError.ID, EvaluatedText: diagnosticError.Explanation, TemplateData: map[string]string{"client": client}}
140
-						logger.LogMessage(log.WarnLevel, message)
141
-						warnings = append(warnings, diagnosticError)
142
-
134
+						r.Warn("sdLogOMreBadCert", nil, prelude+`This message was diagnosed above, but for a different client address.`)
143 135
 					} // else, it's a repeat, don't mention it
144
-					return true, nil, warnings, nil // show once for every client failing to connect, not just the first
136
+					return true /* show once for every client failing to connect, not just the first */, r
145 137
 				},
146 138
 			},
147 139
 			{
... ...
@@ -167,11 +159,6 @@ message for any node with this problem.
167 167
 		},
168 168
 	},
169 169
 	{
170
-		Name:        "openshift-sdn-master",
171
-		StartMatch:  regexp.MustCompile("Starting OpenShift SDN Master"),
172
-		LogMatchers: []logMatcher{},
173
-	},
174
-	{
175 170
 		Name:       "openshift-node",
176 171
 		StartMatch: regexp.MustCompile("Starting an OpenShift node"),
177 172
 		LogMatchers: []logMatcher{
... ...
@@ -236,25 +223,19 @@ to the .kubeconfig specified in /etc/sysconfig/openshift-node
236 236
 This host will not function as a node until this is resolved. Pods
237 237
 scheduled for this node will remain in pending or unknown state forever.`,
238 238
 			},
239
-		},
240
-	},
241
-	{
242
-		Name:       "openshift-sdn-node",
243
-		StartMatch: regexp.MustCompile("Starting OpenShift SDN node"),
244
-		LogMatchers: []logMatcher{
245 239
 			{
246 240
 				Regexp: regexp.MustCompile("Could not find an allocated subnet for this minion.*Waiting.."),
247 241
 				Level:  log.WarnLevel,
248 242
 				Id:     "sdLogOSNnoSubnet",
249 243
 				Interpretation: `
250
-This warning occurs when openshift-sdn-node is trying to request the
244
+This warning occurs when openshift-node is trying to request the
251 245
 SDN subnet it should be configured with according to openshift-sdn-master,
252 246
 but either can't connect to it ("All the given peers are not reachable")
253 247
 or has not yet been assigned a subnet ("Key not found").
254 248
 
255 249
 This can just be a matter of waiting for the master to become fully
256 250
 available and define a record for the node (aka "minion") to use,
257
-and openshift-sdn-node will wait until that occurs, so the presence
251
+and openshift-node will wait until that occurs, so the presence
258 252
 of this message in the node log isn't necessarily a problem as
259 253
 long as the SDN is actually working, but this message may help indicate
260 254
 the problem if it is not working.
... ...
@@ -262,8 +243,8 @@ the problem if it is not working.
262 262
 If the master is available and this node's record is defined and this
263 263
 message persists, then it may be a sign of a different misconfiguration.
264 264
 Unfortunately the message is not specific about why the connection failed.
265
-Check MASTER_URL in /etc/sysconfig/openshift-sdn-node:
266
- * Is the protocol https? It should be http.
265
+Check the master's URL in the node configuration.
266
+ * Is the protocol http? It should be https.
267 267
  * Can you reach the address and port from the node using curl?
268 268
    ("404 page not found" is correct response)`,
269 269
 			},
... ...
@@ -8,14 +8,15 @@ import (
8 8
 
9 9
 	"github.com/openshift/origin/pkg/diagnostics/log"
10 10
 	"github.com/openshift/origin/pkg/diagnostics/types"
11
-	"github.com/openshift/origin/pkg/diagnostics/types/diagnostic"
12 11
 )
13 12
 
14 13
 // UnitStatus
15 14
 type UnitStatus struct {
16 15
 	SystemdUnits map[string]types.SystemdUnit
16
+}
17 17
 
18
-	Log *log.Logger
18
+func (d UnitStatus) Name() string {
19
+	return "UnitStatus"
19 20
 }
20 21
 
21 22
 func (d UnitStatus) Description() string {
... ...
@@ -30,67 +31,33 @@ func (d UnitStatus) CanRun() (bool, error) {
30 30
 
31 31
 	return false, errors.New("systemd is not present on this host")
32 32
 }
33
-func (d UnitStatus) Check() (bool, []log.Message, []error, []error) {
34
-	if _, err := d.CanRun(); err != nil {
35
-		return false, nil, nil, []error{err}
36
-	}
37
-
38
-	warnings := []error{}
39
-	errors := []error{}
40
-
41
-	unitWarnings, unitErrors := unitRequiresUnit(d.Log, d.SystemdUnits["openshift-node"], d.SystemdUnits["iptables"], nodeRequiresIPTables)
42
-	warnings = append(warnings, unitWarnings...)
43
-	errors = append(errors, unitErrors...)
33
+func (d UnitStatus) Check() *types.DiagnosticResult {
34
+	r := types.NewDiagnosticResult("UnitStatus")
44 35
 
45
-	unitWarnings, unitErrors = unitRequiresUnit(d.Log, d.SystemdUnits["openshift-node"], d.SystemdUnits["docker"], `OpenShift nodes use Docker to run containers.`)
46
-	warnings = append(warnings, unitWarnings...)
47
-	errors = append(errors, unitErrors...)
48
-
49
-	unitWarnings, unitErrors = unitRequiresUnit(d.Log, d.SystemdUnits["openshift"], d.SystemdUnits["docker"], `OpenShift nodes use Docker to run containers.`)
50
-	warnings = append(warnings, unitWarnings...)
51
-	errors = append(errors, unitErrors...)
52
-
53
-	// node's dependency on openvswitch is a special case.
54
-	// We do not need to enable ovs because openshift-node starts it for us.
55
-	if d.SystemdUnits["openshift-node"].Active && !d.SystemdUnits["openvswitch"].Active {
56
-		diagnosticError := diagnostic.NewDiagnosticError("sdUnitSDNreqOVS", sdUnitSDNreqOVS, nil)
57
-		d.Log.Error(diagnosticError.ID, diagnosticError.Explanation)
58
-		errors = append(errors, diagnosticError)
59
-	}
36
+	unitRequiresUnit(r, d.SystemdUnits["openshift-node"], d.SystemdUnits["iptables"], nodeRequiresIPTables)
37
+	unitRequiresUnit(r, d.SystemdUnits["openshift-node"], d.SystemdUnits["docker"], `OpenShift nodes use Docker to run containers.`)
38
+	unitRequiresUnit(r, d.SystemdUnits["openshift-node"], d.SystemdUnits["openvswitch"], sdUnitSDNreqOVS)
39
+	unitRequiresUnit(r, d.SystemdUnits["openshift-master"], d.SystemdUnits["openvswitch"], `OpenShift masters use openvswitch for access to cluster SDN networking`)
40
+	// all-in-one networking *could* be simpler, so fewer checks
41
+	unitRequiresUnit(r, d.SystemdUnits["openshift"], d.SystemdUnits["docker"], `OpenShift nodes use Docker to run containers.`)
60 42
 
61 43
 	// Anything that is enabled but not running deserves notice
62 44
 	for name, unit := range d.SystemdUnits {
63 45
 		if unit.Enabled && !unit.Active {
64
-			diagnosticError := diagnostic.NewDiagnosticErrorFromTemplate("sdUnitInactive", sdUnitInactive, map[string]string{"unit": name})
65
-			d.Log.LogMessage(log.ErrorLevel, *diagnosticError.LogMessage)
66
-			errors = append(errors, diagnosticError)
46
+			r.Errort("sdUnitInactive", nil, sdUnitInactive, log.Hash{"unit": name})
67 47
 		}
68 48
 	}
69
-
70
-	return (len(errors) == 0), nil, warnings, errors
49
+	return r
71 50
 }
72 51
 
73
-func unitRequiresUnit(logger *log.Logger, unit types.SystemdUnit, requires types.SystemdUnit, reason string) ([]error, []error) {
74
-	templateData := map[string]string{"unit": unit.Name, "required": requires.Name, "reason": reason}
52
+func unitRequiresUnit(r *types.DiagnosticResult, unit types.SystemdUnit, requires types.SystemdUnit, reason string) {
53
+	templateData := log.Hash{"unit": unit.Name, "required": requires.Name, "reason": reason}
75 54
 
76 55
 	if (unit.Active || unit.Enabled) && !requires.Exists {
77
-		diagnosticError := diagnostic.NewDiagnosticErrorFromTemplate("sdUnitReqLoaded", sdUnitReqLoaded, templateData)
78
-		logger.LogMessage(log.ErrorLevel, *diagnosticError.LogMessage)
79
-		return nil, []error{diagnosticError}
80
-
56
+		r.Errort("sdUnitReqLoaded", nil, sdUnitReqLoaded, templateData)
81 57
 	} else if unit.Active && !requires.Active {
82
-		diagnosticError := diagnostic.NewDiagnosticErrorFromTemplate("sdUnitReqActive", sdUnitReqActive, templateData)
83
-		logger.LogMessage(log.ErrorLevel, *diagnosticError.LogMessage)
84
-		return nil, []error{diagnosticError}
85
-
86
-	} else if unit.Enabled && !requires.Enabled {
87
-		diagnosticError := diagnostic.NewDiagnosticErrorFromTemplate("sdUnitReqEnabled", sdUnitReqEnabled, templateData)
88
-		logger.LogMessage(log.WarnLevel, *diagnosticError.LogMessage)
89
-		return []error{diagnosticError}, nil
90
-
58
+		r.Errort("sdUnitReqActive", nil, sdUnitReqActive, templateData)
91 59
 	}
92
-
93
-	return nil, nil
94 60
 }
95 61
 
96 62
 func errStr(err error) string {
... ...
@@ -156,12 +123,4 @@ To ensure it is not failing to run, check the status and logs with:
156 156
   # systemctl status {{.required}}
157 157
   # journalctl -ru {{.required}}
158 158
   `
159
-
160
-	sdUnitReqEnabled = `
161
-systemd unit {{.unit}} is enabled to run automatically at boot, but {{.required}} is not.
162
-{{.reason}}
163
-An administrator can enable the {{.required}} unit with:
164
-
165
-  # systemctl enable {{.required}}
166
-  `
167 159
 )
168 160
new file mode 100644
... ...
@@ -0,0 +1,175 @@
0
+package types
1
+
2
+import (
3
+	"fmt"
4
+	"github.com/golang/glog"
5
+	"runtime"
6
+	"strings"
7
+
8
+	"github.com/openshift/origin/pkg/diagnostics/log"
9
+)
10
+
11
+type Diagnostic interface {
12
+	Name() string
13
+	Description() string
14
+	CanRun() (canRun bool, reason error)
15
+	Check() *DiagnosticResult
16
+}
17
+
18
+type DiagnosticResult struct {
19
+	failure  bool
20
+	origin   string // name of diagnostic; automatically inserted into log Entries
21
+	logs     []log.Entry
22
+	warnings []DiagnosticError
23
+	errors   []DiagnosticError
24
+}
25
+
26
+func NewDiagnosticResult(origin string) *DiagnosticResult {
27
+	return &DiagnosticResult{origin: origin}
28
+}
29
+
30
+func (r *DiagnosticResult) Complete() *DiagnosticResult {
31
+	if r.errors == nil {
32
+		r.errors = make([]DiagnosticError, 0)
33
+	}
34
+	if r.warnings == nil {
35
+		r.warnings = make([]DiagnosticError, 0)
36
+	}
37
+	if r.logs == nil {
38
+		r.logs = make([]log.Entry, 0)
39
+	}
40
+	return r
41
+}
42
+
43
+func (r *DiagnosticResult) appendLogs(stackDepth int, entry ...log.Entry) {
44
+	if r.logs == nil {
45
+		r.logs = make([]log.Entry, 0)
46
+	}
47
+	r.logs = append(r.logs, entry...)
48
+	// glog immediately for debugging when a diagnostic silently chokes
49
+	for _, entry := range entry {
50
+		if glog.V(glog.Level(6 - entry.Level.Level)) {
51
+			glog.InfoDepth(stackDepth, entry.Message.String())
52
+		}
53
+	}
54
+}
55
+
56
+func (r *DiagnosticResult) Failure() bool {
57
+	return r.failure
58
+}
59
+
60
+func (r *DiagnosticResult) Logs() []log.Entry {
61
+	if r.logs == nil {
62
+		return make([]log.Entry, 0)
63
+	}
64
+	return r.logs
65
+}
66
+
67
+func (r *DiagnosticResult) appendWarnings(warn ...DiagnosticError) {
68
+	if r.warnings == nil {
69
+		r.warnings = make([]DiagnosticError, 0)
70
+	}
71
+	r.warnings = append(r.warnings, warn...)
72
+}
73
+
74
+func (r *DiagnosticResult) Warnings() []DiagnosticError {
75
+	if r.warnings == nil {
76
+		return make([]DiagnosticError, 0)
77
+	}
78
+	return r.warnings
79
+}
80
+
81
+func (r *DiagnosticResult) appendErrors(err ...DiagnosticError) {
82
+	if r.errors == nil {
83
+		r.errors = make([]DiagnosticError, 0)
84
+	}
85
+	r.failure = true
86
+	r.errors = append(r.errors, err...)
87
+}
88
+
89
+func (r *DiagnosticResult) Errors() []DiagnosticError {
90
+	if r.errors == nil {
91
+		return make([]DiagnosticError, 0)
92
+	}
93
+	return r.errors
94
+}
95
+
96
+func (r *DiagnosticResult) Append(r2 *DiagnosticResult) {
97
+	r.Complete()
98
+	r2.Complete()
99
+	r.logs = append(r.logs, r2.logs...)
100
+	r.warnings = append(r.warnings, r2.warnings...)
101
+	r.errors = append(r.errors, r2.errors...)
102
+	r.failure = r.failure || r2.failure
103
+}
104
+
105
+// basic ingress functions (private)
106
+func (r *DiagnosticResult) caller(depth int) string {
107
+	if _, file, line, ok := runtime.Caller(depth + 1); ok {
108
+		paths := strings.SplitAfter(file, "github.com/")
109
+		return fmt.Sprintf("diagnostic %s@%s:%d", r.origin, paths[len(paths)-1], line)
110
+	}
111
+	return "diagnostic " + r.origin
112
+}
113
+func (r *DiagnosticResult) logError(id string, err error, msg *log.Message) {
114
+	r.appendLogs(2, log.Entry{id, r.caller(2), log.ErrorLevel, *msg})
115
+	if de, ok := err.(DiagnosticError); ok {
116
+		r.appendErrors(de)
117
+	} else {
118
+		r.appendErrors(DiagnosticError{id, msg, err})
119
+	}
120
+}
121
+func (r *DiagnosticResult) logWarning(id string, err error, msg *log.Message) {
122
+	r.appendLogs(2, log.Entry{id, r.caller(2), log.WarnLevel, *msg})
123
+	if de, ok := err.(DiagnosticError); ok {
124
+		r.appendWarnings(de)
125
+	} else {
126
+		r.appendWarnings(DiagnosticError{id, msg, err})
127
+	}
128
+}
129
+func (r *DiagnosticResult) logMessage(id string, level log.Level, msg *log.Message) {
130
+	r.appendLogs(2, log.Entry{id, r.caller(2), level, *msg})
131
+}
132
+
133
+// Public ingress functions
134
+// Errors are recorded as errors and also logged
135
+func (r *DiagnosticResult) Error(id string, err error, text string) {
136
+	r.logError(id, err, &log.Message{id, "", nil, text})
137
+}
138
+func (r *DiagnosticResult) Errorf(id string, err error, format string, a ...interface{}) {
139
+	r.logError(id, err, &log.Message{id, "", nil, fmt.Sprintf(format, a...)})
140
+}
141
+func (r *DiagnosticResult) Errort(id string, err error, template string, data interface{} /* log.Hash */) {
142
+	r.logError(id, err, &log.Message{id, template, data, ""})
143
+}
144
+
145
+// Warnings are recorded as warnings and also logged
146
+func (r *DiagnosticResult) Warn(id string, err error, text string) {
147
+	r.logWarning(id, err, &log.Message{id, "", nil, text})
148
+}
149
+func (r *DiagnosticResult) Warnf(id string, err error, format string, a ...interface{}) {
150
+	r.logWarning(id, err, &log.Message{id, "", nil, fmt.Sprintf(format, a...)})
151
+}
152
+func (r *DiagnosticResult) Warnt(id string, err error, template string, data interface{} /* log.Hash */) {
153
+	r.logWarning(id, err, &log.Message{id, template, data, ""})
154
+}
155
+
156
+// Info/Debug are just logged.
157
+func (r *DiagnosticResult) Info(id string, text string) {
158
+	r.logMessage(id, log.InfoLevel, &log.Message{id, "", nil, text})
159
+}
160
+func (r *DiagnosticResult) Infof(id string, format string, a ...interface{}) {
161
+	r.logMessage(id, log.InfoLevel, &log.Message{id, "", nil, fmt.Sprintf(format, a...)})
162
+}
163
+func (r *DiagnosticResult) Infot(id string, template string, data interface{} /* log.Hash */) {
164
+	r.logMessage(id, log.InfoLevel, &log.Message{id, template, data, ""})
165
+}
166
+func (r *DiagnosticResult) Debug(id string, text string) {
167
+	r.logMessage(id, log.DebugLevel, &log.Message{id, "", nil, text})
168
+}
169
+func (r *DiagnosticResult) Debugf(id string, format string, a ...interface{}) {
170
+	r.logMessage(id, log.DebugLevel, &log.Message{id, "", nil, fmt.Sprintf(format, a...)})
171
+}
172
+func (r *DiagnosticResult) Debugt(id string, template string, data interface{} /* log.Hash */) {
173
+	r.logMessage(id, log.DebugLevel, &log.Message{id, template, data, ""})
174
+}
0 175
deleted file mode 100644
... ...
@@ -1,55 +0,0 @@
1
-package diagnostic
2
-
3
-// This needed to be separate from other types to avoid import cycle
4
-// diagnostic -> discovery -> types
5
-
6
-import (
7
-	"fmt"
8
-
9
-	"github.com/openshift/origin/pkg/diagnostics/log"
10
-)
11
-
12
-type Diagnostic interface {
13
-	Description() string
14
-	CanRun() (canRun bool, reason error)
15
-	Check() (success bool, info []log.Message, warnings []error, errors []error)
16
-}
17
-
18
-type DiagnosticError struct {
19
-	ID          string
20
-	Explanation string
21
-	Cause       error
22
-
23
-	LogMessage *log.Message
24
-}
25
-
26
-func NewDiagnosticError(id, explanation string, cause error) DiagnosticError {
27
-	return DiagnosticError{id, explanation, cause, nil}
28
-}
29
-
30
-func NewDiagnosticErrorFromTemplate(id, template string, templateData interface{}) DiagnosticError {
31
-	return DiagnosticError{id, "", nil,
32
-		&log.Message{
33
-			ID:           id,
34
-			Template:     template,
35
-			TemplateData: templateData,
36
-		},
37
-	}
38
-}
39
-
40
-func (e DiagnosticError) Error() string {
41
-	if e.Cause != nil {
42
-		return e.Cause.Error()
43
-	}
44
-
45
-	if e.LogMessage != nil {
46
-		return fmt.Sprintf("%v", e.LogMessage)
47
-	}
48
-
49
-	return e.Explanation
50
-}
51
-
52
-func IsDiagnosticError(e error) bool {
53
-	_, ok := e.(DiagnosticError)
54
-	return ok
55
-}
56 1
new file mode 100644
... ...
@@ -0,0 +1,36 @@
0
+package types
1
+
2
+import (
3
+	"fmt"
4
+
5
+	"github.com/openshift/origin/pkg/diagnostics/log"
6
+)
7
+
8
+type DiagnosticError struct {
9
+	ID         string
10
+	LogMessage *log.Message
11
+	Cause      error
12
+}
13
+
14
+func (e DiagnosticError) Error() string {
15
+	if e.LogMessage != nil {
16
+		return fmt.Sprintf("%v", e.LogMessage)
17
+	}
18
+	if e.Cause != nil {
19
+		return e.Cause.Error()
20
+	}
21
+	return e.ID
22
+}
23
+
24
+func IsDiagnosticError(e error) bool {
25
+	_, ok := e.(DiagnosticError)
26
+	return ok
27
+}
28
+
29
+// is the error a diagnostics error that matches the given ID?
30
+func MatchesDiagError(err error, id string) bool {
31
+	if derr, ok := err.(DiagnosticError); ok && derr.ID == id {
32
+		return true
33
+	}
34
+	return false
35
+}
0 36
deleted file mode 100644
... ...
@@ -1,38 +0,0 @@
1
-package types
2
-
3
-import "fmt"
4
-
5
-type Version struct {
6
-	X, Y, Z int
7
-}
8
-
9
-func (a Version) Eq(b Version) bool {
10
-	return a.X == b.X && a.Y == b.Y && a.Z == b.Z
11
-}
12
-
13
-func (a Version) Gt(b Version) bool {
14
-	if a.X > b.X {
15
-		return true
16
-	}
17
-	if a.X < b.X {
18
-		return false
19
-	} // so, Xs are equal
20
-	if a.Y > b.Y {
21
-		return true
22
-	}
23
-	if a.Y < b.Y {
24
-		return false
25
-	} // so, Ys are equal
26
-	if a.Z > b.Z {
27
-		return true
28
-	}
29
-	return false
30
-}
31
-
32
-func (v Version) GoString() string {
33
-	return fmt.Sprintf("%d.%d.%d", v.X, v.Y, v.Z)
34
-}
35
-
36
-func (v Version) NonZero() bool {
37
-	return !v.Eq(Version{0, 0, 0})
38
-}
... ...
@@ -4392,6 +4392,36 @@ _openshift_ex_build-chain()
4392 4392
     must_have_one_noun=()
4393 4393
 }
4394 4394
 
4395
+_openshift_ex_diagnostics()
4396
+{
4397
+    last_command="openshift_ex_diagnostics"
4398
+    commands=()
4399
+
4400
+    flags=()
4401
+    two_word_flags=()
4402
+    flags_with_completion=()
4403
+    flags_completion=()
4404
+
4405
+    flags+=("--cluster-context=")
4406
+    flags+=("--config=")
4407
+    flags+=("--context=")
4408
+    flags+=("--diaglevel=")
4409
+    two_word_flags+=("-l")
4410
+    flags+=("--diagnostics=")
4411
+    two_word_flags+=("-d")
4412
+    flags+=("--help")
4413
+    flags+=("-h")
4414
+    flags+=("--host")
4415
+    flags+=("--loglevel=")
4416
+    flags+=("--master-config=")
4417
+    flags+=("--node-config=")
4418
+    flags+=("--output=")
4419
+    two_word_flags+=("-o")
4420
+
4421
+    must_have_one_flag=()
4422
+    must_have_one_noun=()
4423
+}
4424
+
4395 4425
 _openshift_ex_options()
4396 4426
 {
4397 4427
     last_command="openshift_ex_options"
... ...
@@ -4416,6 +4446,7 @@ _openshift_ex()
4416 4416
     commands+=("tokens")
4417 4417
     commands+=("ipfailover")
4418 4418
     commands+=("build-chain")
4419
+    commands+=("diagnostics")
4419 4420
     commands+=("options")
4420 4421
 
4421 4422
     flags=()
4422 4423
new file mode 100644
... ...
@@ -0,0 +1,73 @@
0
+// +build integration,!no-etcd
1
+
2
+package integration
3
+
4
+import (
5
+	"testing"
6
+
7
+	kapi "github.com/GoogleCloudPlatform/kubernetes/pkg/api"
8
+
9
+	clusterdiags "github.com/openshift/origin/pkg/diagnostics/cluster"
10
+	diagtype "github.com/openshift/origin/pkg/diagnostics/types"
11
+	testutil "github.com/openshift/origin/test/util"
12
+)
13
+
14
+func TestDiagNodeConditions(t *testing.T) {
15
+	//masterConfig, clientFile, err := testutil.StartTestAllInOne()
16
+	_, clientFile, err := testutil.StartTestMaster()
17
+	if err != nil {
18
+		t.Fatalf("unexpected error: %v", err)
19
+	}
20
+	client, err := testutil.GetClusterAdminKubeClient(clientFile)
21
+	if err != nil {
22
+		t.Fatalf("unexpected error: %v", err)
23
+	}
24
+
25
+	nodeDiag := clusterdiags.NodeDefinitions{KubeClient: client}
26
+	// First check with no nodes defined; should get an error about that.
27
+	// ok, logs, warnings, errors := nodeDiag.Check()
28
+	if errors := nodeDiag.Check().Errors(); len(errors) != 1 ||
29
+		!diagtype.MatchesDiagError(errors[0], "clNoAvailNodes") {
30
+		t.Errorf("expected 1 error about not having nodes, not: %#v", errors)
31
+	}
32
+
33
+	// Next create a node and leave it in NotReady state. Should get a warning
34
+	// about that, plus the previous error as there are still no nodes available.
35
+	node, err := client.Nodes().Create(&kapi.Node{ObjectMeta: kapi.ObjectMeta{Name: "test-node"}})
36
+	if err != nil {
37
+		t.Fatalf("expected no errors creating a node: %#v", err)
38
+	}
39
+	result := nodeDiag.Check()
40
+	if errors := result.Errors(); len(errors) != 1 ||
41
+		!diagtype.MatchesDiagError(errors[0], "clNoAvailNodes") {
42
+		t.Fatalf("expected 1 error about not having nodes, not: %#v", errors)
43
+	} else if warnings := result.Warnings(); len(warnings) < 1 || !diagtype.MatchesDiagError(warnings[0], "clNodeNotReady") {
44
+		t.Fatalf("expected a warning about test-node not being ready, not: %#v", warnings)
45
+	}
46
+
47
+	_ = node
48
+	/*
49
+		// Put the new node in Ready state and verify the diagnostic is clean
50
+		if _, err := client.Nodes().UpdateStatus(node); err != nil {
51
+			t.Fatalf("expected no errors updating node status, but: %#v", err)
52
+		}
53
+		result = nodeDiag.Check()
54
+		if warnings := result.Warnings(); len(warnings) > 0 {
55
+			t.Fatalf("expected no warning with one node ready, but: %#v", warnings)
56
+		} else if errors := result.Errors(); len(warnings) > 0 {
57
+			t.Fatalf("expected no errors with one node ready, but: %#v", errors)
58
+		}
59
+
60
+		// Make the node unschedulable and verify diagnostics notices
61
+		node.Spec.Unschedulable = true
62
+		if _, err := client.Nodes().Update(node); err != nil {
63
+			t.Fatalf("expected no errors making node unschedulable, but: %#v", err)
64
+		}
65
+		if errors := result.Errors(); len(errors) != 1 ||
66
+			!diagtype.MatchesDiagError(errors[0], "clNoAvailNodes") {
67
+			t.Fatalf("expected 1 error about not having nodes, but: %#v", errors)
68
+		} else if warnings := result.Warnings(); len(warnings) < 1 || !diagtype.MatchesDiagError(warnings[0], "clNodeNotSched") {
69
+			t.Fatalf("expected a warning about test-node not being schedulable, but: %#v", warnings)
70
+		}
71
+	*/
72
+}