master/node/client subcommands went away in favor of client/cluster/host diagnostic builders.
Diagnostic logging was completely refactored.
Diagnostics now return a result object instead of logging
directly so they could be run in parallel (though they don't yet).
Updated the help accordingly.
| ... | ... |
@@ -2,135 +2,44 @@ package diagnostics |
| 2 | 2 |
|
| 3 | 3 |
import ( |
| 4 | 4 |
"fmt" |
| 5 |
- "io" |
|
| 6 |
- "os" |
|
| 7 | 5 |
|
| 8 |
- "github.com/spf13/cobra" |
|
| 9 |
- |
|
| 10 |
- kclient "github.com/GoogleCloudPlatform/kubernetes/pkg/client" |
|
| 11 |
- kclientcmdapi "github.com/GoogleCloudPlatform/kubernetes/pkg/client/clientcmd/api" |
|
| 12 |
- kcmdutil "github.com/GoogleCloudPlatform/kubernetes/pkg/kubectl/cmd/util" |
|
| 6 |
+ clientcmdapi "github.com/GoogleCloudPlatform/kubernetes/pkg/client/clientcmd/api" |
|
| 13 | 7 |
"github.com/GoogleCloudPlatform/kubernetes/pkg/util" |
| 14 | 8 |
|
| 15 |
- diagnosticflags "github.com/openshift/origin/pkg/cmd/experimental/diagnostics/options" |
|
| 16 |
- osclientcmd "github.com/openshift/origin/pkg/cmd/util/clientcmd" |
|
| 17 | 9 |
clientdiagnostics "github.com/openshift/origin/pkg/diagnostics/client" |
| 18 |
- "github.com/openshift/origin/pkg/diagnostics/log" |
|
| 19 |
- diagnostictypes "github.com/openshift/origin/pkg/diagnostics/types/diagnostic" |
|
| 10 |
+ "github.com/openshift/origin/pkg/diagnostics/types" |
|
| 20 | 11 |
) |
| 21 | 12 |
|
| 22 |
-const ClientDiagnosticsRecommendedName = "client" |
|
| 13 |
+const ( |
|
| 14 |
+ ConfigContexts = "ConfigContexts" |
|
| 15 |
+) |
|
| 23 | 16 |
|
| 24 | 17 |
var ( |
| 25 |
- AvailableClientDiagnostics = util.NewStringSet("ConfigContexts", "NodeDefinitions")
|
|
| 18 |
+ AvailableClientDiagnostics = util.NewStringSet(ConfigContexts) // add more diagnostics as they are defined |
|
| 26 | 19 |
) |
| 27 | 20 |
|
| 28 |
-// user options for openshift-diagnostics client command |
|
| 29 |
-type ClientDiagnosticsOptions struct {
|
|
| 30 |
- RequestedDiagnostics util.StringList |
|
| 31 |
- |
|
| 32 |
- KubeClient *kclient.Client |
|
| 33 |
- KubeConfig *kclientcmdapi.Config |
|
| 34 |
- |
|
| 35 |
- LogOptions *log.LoggerOptions |
|
| 36 |
- Logger *log.Logger |
|
| 37 |
-} |
|
| 21 |
+func (o DiagnosticsOptions) buildClientDiagnostics(rawConfig *clientcmdapi.Config) ([]types.Diagnostic, bool /* ok */, error) {
|
|
| 38 | 22 |
|
| 39 |
-const longClientDescription = ` |
|
| 40 |
-OpenShift Diagnostics |
|
| 41 |
- |
|
| 42 |
-This command helps you understand and troubleshoot OpenShift as a user. It is |
|
| 43 |
-intended to be run from the same context as an OpenShift client |
|
| 44 |
-("openshift cli" or "osc") and with the same configuration options.
|
|
| 45 |
- |
|
| 46 |
- $ %s |
|
| 47 |
-` |
|
| 48 |
- |
|
| 49 |
-func NewClientCommand(name string, fullName string, out io.Writer) *cobra.Command {
|
|
| 50 |
- o := &ClientDiagnosticsOptions{
|
|
| 51 |
- RequestedDiagnostics: AvailableClientDiagnostics.List(), |
|
| 52 |
- LogOptions: &log.LoggerOptions{Out: out},
|
|
| 23 |
+ osClient, kubeClient, clientErr := o.Factory.Clients() |
|
| 24 |
+ _ = osClient // remove once a diagnostic makes use of OpenShift client |
|
| 25 |
+ _ = kubeClient // remove once a diagnostic makes use of kube client |
|
| 26 |
+ if clientErr != nil {
|
|
| 27 |
+ o.Logger.Notice("clLoadDefaultFailed", "Failed creating client from config; client diagnostics will be limited to config testing")
|
|
| 28 |
+ AvailableClientDiagnostics = util.NewStringSet(ConfigContexts) |
|
| 53 | 29 |
} |
| 54 | 30 |
|
| 55 |
- var factory *osclientcmd.Factory |
|
| 56 |
- |
|
| 57 |
- cmd := &cobra.Command{
|
|
| 58 |
- Use: name, |
|
| 59 |
- Short: "Troubleshoot using the OpenShift v3 client.", |
|
| 60 |
- Long: fmt.Sprintf(longClientDescription, fullName), |
|
| 61 |
- Run: func(c *cobra.Command, args []string) {
|
|
| 62 |
- kcmdutil.CheckErr(o.Complete()) |
|
| 63 |
- |
|
| 64 |
- _, kubeClient, err := factory.Clients() |
|
| 65 |
- kcmdutil.CheckErr(err) |
|
| 66 |
- |
|
| 67 |
- kubeConfig, err := factory.OpenShiftClientConfig.RawConfig() |
|
| 68 |
- kcmdutil.CheckErr(err) |
|
| 69 |
- |
|
| 70 |
- o.KubeClient = kubeClient |
|
| 71 |
- o.KubeConfig = &kubeConfig |
|
| 72 |
- |
|
| 73 |
- failed, err := o.RunDiagnostics() |
|
| 74 |
- o.Logger.Summary() |
|
| 75 |
- o.Logger.Finish() |
|
| 76 |
- |
|
| 77 |
- kcmdutil.CheckErr(err) |
|
| 78 |
- if failed {
|
|
| 79 |
- os.Exit(255) |
|
| 80 |
- } |
|
| 81 |
- |
|
| 82 |
- }, |
|
| 83 |
- } |
|
| 84 |
- cmd.SetOutput(out) // for output re: usage / help |
|
| 85 |
- factory = osclientcmd.New(cmd.Flags()) // side effect: add standard persistent flags for openshift client |
|
| 86 |
- diagnosticflags.BindLoggerOptionFlags(cmd.Flags(), o.LogOptions, diagnosticflags.RecommendedLoggerOptionFlags()) |
|
| 87 |
- diagnosticflags.BindDiagnosticFlag(cmd.Flags(), &o.RequestedDiagnostics, diagnosticflags.NewRecommendedDiagnosticFlag()) |
|
| 88 |
- |
|
| 89 |
- return cmd |
|
| 90 |
-} |
|
| 91 |
- |
|
| 92 |
-func (o *ClientDiagnosticsOptions) Complete() error {
|
|
| 93 |
- var err error |
|
| 94 |
- o.Logger, err = o.LogOptions.NewLogger() |
|
| 95 |
- if err != nil {
|
|
| 96 |
- return err |
|
| 97 |
- } |
|
| 98 |
- |
|
| 99 |
- return nil |
|
| 100 |
-} |
|
| 101 |
- |
|
| 102 |
-func (o ClientDiagnosticsOptions) RunDiagnostics() (bool, error) {
|
|
| 103 |
- diagnostics := map[string]diagnostictypes.Diagnostic{}
|
|
| 104 |
- |
|
| 105 |
- for _, diagnosticName := range o.RequestedDiagnostics {
|
|
| 31 |
+ diagnostics := []types.Diagnostic{}
|
|
| 32 |
+ requestedDiagnostics := intersection(util.NewStringSet(o.RequestedDiagnostics...), AvailableClientDiagnostics).List() |
|
| 33 |
+ for _, diagnosticName := range requestedDiagnostics {
|
|
| 106 | 34 |
switch diagnosticName {
|
| 107 |
- case "ConfigContexts": |
|
| 108 |
- for contextName, _ := range o.KubeConfig.Contexts {
|
|
| 109 |
- diagnostics[diagnosticName+"["+contextName+"]"] = clientdiagnostics.ConfigContext{o.KubeConfig, contextName, o.Logger}
|
|
| 35 |
+ case ConfigContexts: |
|
| 36 |
+ for contextName := range rawConfig.Contexts {
|
|
| 37 |
+ diagnostics = append(diagnostics, clientdiagnostics.ConfigContext{rawConfig, contextName})
|
|
| 110 | 38 |
} |
| 111 | 39 |
|
| 112 |
- case "NodeDefinitions": |
|
| 113 |
- diagnostics[diagnosticName] = clientdiagnostics.NodeDefinition{o.KubeClient, o.Logger}
|
|
| 114 |
- |
|
| 115 | 40 |
default: |
| 116 |
- return false, fmt.Errorf("unknown diagnostic: %v", diagnosticName)
|
|
| 117 |
- } |
|
| 118 |
- } |
|
| 119 |
- |
|
| 120 |
- for name, diagnostic := range diagnostics {
|
|
| 121 |
- |
|
| 122 |
- if canRun, reason := diagnostic.CanRun(); !canRun {
|
|
| 123 |
- if reason == nil {
|
|
| 124 |
- o.Logger.Noticem(log.Message{ID: "diagSkip", Template: "Skipping diagnostic: {{.area}}.{{.name}}\nDescription: {{.diag}}", TemplateData: map[string]string{"area": "client", "name": name, "diag": diagnostic.Description()}})
|
|
| 125 |
- } else {
|
|
| 126 |
- o.Logger.Noticem(log.Message{ID: "diagSkip", Template: "Skipping diagnostic: {{.area}}.{{.name}}\nDescription: {{.diag}}\nBecause: {{.reason}}", TemplateData: map[string]string{"area": "client", "name": name, "diag": diagnostic.Description(), "reason": reason.Error()}})
|
|
| 127 |
- } |
|
| 128 |
- continue |
|
| 41 |
+ return nil, false, fmt.Errorf("unknown diagnostic: %v", diagnosticName)
|
|
| 129 | 42 |
} |
| 130 |
- |
|
| 131 |
- o.Logger.Noticem(log.Message{ID: "diagRun", Template: "Running diagnostic: {{.area}}.{{.name}}\nDescription: {{.diag}}", TemplateData: map[string]string{"area": "client", "name": name, "diag": diagnostic.Description()}})
|
|
| 132 |
- diagnostic.Check() |
|
| 133 | 43 |
} |
| 134 |
- |
|
| 135 |
- return o.Logger.ErrorsSeen(), nil |
|
| 44 |
+ return diagnostics, true, clientErr |
|
| 136 | 45 |
} |
| 137 | 46 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,111 @@ |
| 0 |
+package diagnostics |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "fmt" |
|
| 4 |
+ "strings" |
|
| 5 |
+ |
|
| 6 |
+ kclient "github.com/GoogleCloudPlatform/kubernetes/pkg/client" |
|
| 7 |
+ clientcmd "github.com/GoogleCloudPlatform/kubernetes/pkg/client/clientcmd" |
|
| 8 |
+ clientcmdapi "github.com/GoogleCloudPlatform/kubernetes/pkg/client/clientcmd/api" |
|
| 9 |
+ "github.com/GoogleCloudPlatform/kubernetes/pkg/util" |
|
| 10 |
+ |
|
| 11 |
+ authorizationapi "github.com/openshift/origin/pkg/authorization/api" |
|
| 12 |
+ "github.com/openshift/origin/pkg/client" |
|
| 13 |
+ osclientcmd "github.com/openshift/origin/pkg/cmd/util/clientcmd" |
|
| 14 |
+ |
|
| 15 |
+ clustdiags "github.com/openshift/origin/pkg/diagnostics/cluster" |
|
| 16 |
+ "github.com/openshift/origin/pkg/diagnostics/types" |
|
| 17 |
+) |
|
| 18 |
+ |
|
| 19 |
+var ( |
|
| 20 |
+ AvailableClusterDiagnostics = util.NewStringSet("NodeDefinitions")
|
|
| 21 |
+) |
|
| 22 |
+ |
|
| 23 |
+func (o DiagnosticsOptions) buildClusterDiagnostics(rawConfig *clientcmdapi.Config) ([]types.Diagnostic, bool /* ok */, error) {
|
|
| 24 |
+ requestedDiagnostics := intersection(util.NewStringSet(o.RequestedDiagnostics...), AvailableClusterDiagnostics).List() |
|
| 25 |
+ if len(requestedDiagnostics) == 0 { // no diagnostics to run here
|
|
| 26 |
+ return nil, true, nil // don't waste time on discovery |
|
| 27 |
+ } |
|
| 28 |
+ |
|
| 29 |
+ var clusterClient *client.Client |
|
| 30 |
+ var kclusterClient *kclient.Client |
|
| 31 |
+ |
|
| 32 |
+ clusterClient, kclusterClient, found, err := o.findClusterClients(rawConfig) |
|
| 33 |
+ if !found {
|
|
| 34 |
+ o.Logger.Notice("noClustCtx", "No cluster-admin client config found; skipping cluster diagnostics.")
|
|
| 35 |
+ return nil, false, err |
|
| 36 |
+ } |
|
| 37 |
+ |
|
| 38 |
+ diagnostics := []types.Diagnostic{}
|
|
| 39 |
+ for _, diagnosticName := range requestedDiagnostics {
|
|
| 40 |
+ switch diagnosticName {
|
|
| 41 |
+ case "NodeDefinitions": |
|
| 42 |
+ diagnostics = append(diagnostics, clustdiags.NodeDefinitions{kclusterClient})
|
|
| 43 |
+ |
|
| 44 |
+ default: |
|
| 45 |
+ return nil, false, fmt.Errorf("unknown diagnostic: %v", diagnosticName)
|
|
| 46 |
+ } |
|
| 47 |
+ } |
|
| 48 |
+ return diagnostics, true, nil |
|
| 49 |
+} |
|
| 50 |
+ |
|
| 51 |
+func (o DiagnosticsOptions) findClusterClients(rawConfig *clientcmdapi.Config) (*client.Client, *kclient.Client, bool, error) {
|
|
| 52 |
+ if o.ClientClusterContext != "" { // user has specified cluster context to use
|
|
| 53 |
+ if context, exists := rawConfig.Contexts[o.ClientClusterContext]; exists {
|
|
| 54 |
+ configErr := fmt.Errorf("Specified '%s' as cluster-admin context, but it was not found in your client configuration.", o.ClientClusterContext)
|
|
| 55 |
+ o.Logger.Error("discClustCtx", configErr.Error())
|
|
| 56 |
+ return nil, nil, false, configErr |
|
| 57 |
+ } else if os, kube, found, err := o.makeClusterClients(rawConfig, o.ClientClusterContext, context); found {
|
|
| 58 |
+ return os, kube, true, err |
|
| 59 |
+ } else {
|
|
| 60 |
+ return nil, nil, false, err |
|
| 61 |
+ } |
|
| 62 |
+ } |
|
| 63 |
+ currentContext, exists := rawConfig.Contexts[rawConfig.CurrentContext] |
|
| 64 |
+ if !exists { // config specified cluster admin context that doesn't exist; complain and quit
|
|
| 65 |
+ configErr := fmt.Errorf("Current context '%s' not found in client configuration; will not attempt cluster diagnostics.", rawConfig.CurrentContext)
|
|
| 66 |
+ o.Logger.Errorf("discClustCtx", configErr.Error())
|
|
| 67 |
+ return nil, nil, false, configErr |
|
| 68 |
+ } |
|
| 69 |
+ // check if current context is already cluster admin |
|
| 70 |
+ if os, kube, found, err := o.makeClusterClients(rawConfig, rawConfig.CurrentContext, currentContext); found {
|
|
| 71 |
+ return os, kube, true, err |
|
| 72 |
+ } |
|
| 73 |
+ // otherwise, for convenience, search for a context with the same server but with the system:admin user |
|
| 74 |
+ for name, context := range rawConfig.Contexts {
|
|
| 75 |
+ if context.Cluster == currentContext.Cluster && name != rawConfig.CurrentContext && strings.HasPrefix(context.AuthInfo, "system:admin/") {
|
|
| 76 |
+ if os, kube, found, err := o.makeClusterClients(rawConfig, name, context); found {
|
|
| 77 |
+ return os, kube, true, err |
|
| 78 |
+ } else {
|
|
| 79 |
+ return nil, nil, false, err // don't try more than one such context, they'll probably fail the same |
|
| 80 |
+ } |
|
| 81 |
+ } |
|
| 82 |
+ } |
|
| 83 |
+ return nil, nil, false, nil |
|
| 84 |
+} |
|
| 85 |
+ |
|
| 86 |
+func (o DiagnosticsOptions) makeClusterClients(rawConfig *clientcmdapi.Config, contextName string, context *clientcmdapi.Context) (*client.Client, *kclient.Client, bool, error) {
|
|
| 87 |
+ overrides := &clientcmd.ConfigOverrides{Context: *context}
|
|
| 88 |
+ clientConfig := clientcmd.NewDefaultClientConfig(*rawConfig, overrides) |
|
| 89 |
+ factory := osclientcmd.NewFactory(clientConfig) |
|
| 90 |
+ o.Logger.Debugf("discClustCtxStart", "Checking if context is cluster-admin: '%s'", contextName)
|
|
| 91 |
+ if osClient, kubeClient, err := factory.Clients(); err != nil {
|
|
| 92 |
+ o.Logger.Debugf("discClustCtx", "Error creating client for context '%s':\n%v", contextName, err)
|
|
| 93 |
+ return nil, nil, false, nil |
|
| 94 |
+ } else {
|
|
| 95 |
+ subjectAccessReview := authorizationapi.SubjectAccessReview{
|
|
| 96 |
+ // we assume if you can list nodes, you're the cluster admin. |
|
| 97 |
+ Verb: "list", |
|
| 98 |
+ Resource: "nodes", |
|
| 99 |
+ } |
|
| 100 |
+ if resp, err := osClient.SubjectAccessReviews("default").Create(&subjectAccessReview); err != nil {
|
|
| 101 |
+ o.Logger.Errorf("discClustCtx", "Error testing cluster-admin access for context '%s':\n%v", contextName, err)
|
|
| 102 |
+ return nil, nil, false, err |
|
| 103 |
+ } else if resp.Allowed {
|
|
| 104 |
+ o.Logger.Infof("discClustCtxFound", "Using context for cluster-admin access: '%s'", contextName)
|
|
| 105 |
+ return osClient, kubeClient, true, nil |
|
| 106 |
+ } |
|
| 107 |
+ } |
|
| 108 |
+ o.Logger.Debugf("discClustCtx", "Context does not have cluster-admin access: '%s'", contextName)
|
|
| 109 |
+ return nil, nil, false, nil |
|
| 110 |
+} |
| 0 | 111 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,29 @@ |
| 0 |
+package diagnostics |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ clientcmdapi "github.com/GoogleCloudPlatform/kubernetes/pkg/client/clientcmd/api" |
|
| 4 |
+ "github.com/openshift/origin/pkg/cmd/cli/config" |
|
| 5 |
+ |
|
| 6 |
+ clientdiagnostics "github.com/openshift/origin/pkg/diagnostics/client" |
|
| 7 |
+ "github.com/openshift/origin/pkg/diagnostics/log" |
|
| 8 |
+ "github.com/openshift/origin/pkg/diagnostics/types" |
|
| 9 |
+) |
|
| 10 |
+ |
|
| 11 |
+func (o DiagnosticsOptions) detectClientConfig() (bool, []types.DiagnosticError, []types.DiagnosticError) {
|
|
| 12 |
+ diagnostic := &clientdiagnostics.ConfigLoading{ConfFlagName: config.OpenShiftConfigFlagName, ClientFlags: o.ClientFlags}
|
|
| 13 |
+ o.Logger.Noticet("diagRun", "Determining if client configuration exists for client/cluster diagnostics",
|
|
| 14 |
+ log.Hash{"area": "client", "name": diagnostic.Name(), "diag": diagnostic.Description()})
|
|
| 15 |
+ result := diagnostic.Check() |
|
| 16 |
+ for _, entry := range result.Logs() {
|
|
| 17 |
+ o.Logger.LogEntry(entry) |
|
| 18 |
+ } |
|
| 19 |
+ return diagnostic.SuccessfulLoad(), result.Warnings(), result.Errors() |
|
| 20 |
+} |
|
| 21 |
+ |
|
| 22 |
+func (o DiagnosticsOptions) buildRawConfig() (*clientcmdapi.Config, error) {
|
|
| 23 |
+ kubeConfig, configErr := o.Factory.OpenShiftClientConfig.RawConfig() |
|
| 24 |
+ if len(kubeConfig.Contexts) == 0 {
|
|
| 25 |
+ return nil, configErr |
|
| 26 |
+ } |
|
| 27 |
+ return &kubeConfig, configErr |
|
| 28 |
+} |
| ... | ... |
@@ -2,89 +2,80 @@ package diagnostics |
| 2 | 2 |
|
| 3 | 3 |
import ( |
| 4 | 4 |
"fmt" |
| 5 |
+ "github.com/spf13/cobra" |
|
| 6 |
+ flag "github.com/spf13/pflag" |
|
| 5 | 7 |
"io" |
| 6 | 8 |
"os" |
| 7 | 9 |
|
| 8 |
- "github.com/spf13/cobra" |
|
| 9 |
- |
|
| 10 | 10 |
kcmdutil "github.com/GoogleCloudPlatform/kubernetes/pkg/kubectl/cmd/util" |
| 11 | 11 |
"github.com/GoogleCloudPlatform/kubernetes/pkg/util" |
| 12 | 12 |
kutilerrors "github.com/GoogleCloudPlatform/kubernetes/pkg/util/errors" |
| 13 |
- |
|
| 14 |
- diagnosticflags "github.com/openshift/origin/pkg/cmd/experimental/diagnostics/options" |
|
| 15 |
- "github.com/openshift/origin/pkg/cmd/templates" |
|
| 13 |
+ "github.com/openshift/origin/pkg/cmd/cli/config" |
|
| 14 |
+ "github.com/openshift/origin/pkg/cmd/flagtypes" |
|
| 16 | 15 |
osclientcmd "github.com/openshift/origin/pkg/cmd/util/clientcmd" |
| 17 |
- "github.com/openshift/origin/pkg/diagnostics/log" |
|
| 18 |
-) |
|
| 19 | 16 |
|
| 20 |
-var ( |
|
| 21 |
- AvailableOverallDiagnostics = util.NewStringSet() |
|
| 17 |
+ "github.com/openshift/origin/pkg/cmd/experimental/diagnostics/options" |
|
| 18 |
+ "github.com/openshift/origin/pkg/diagnostics/log" |
|
| 19 |
+ "github.com/openshift/origin/pkg/diagnostics/types" |
|
| 22 | 20 |
) |
| 23 | 21 |
|
| 24 |
-func init() {
|
|
| 25 |
- AvailableOverallDiagnostics.Insert(AvailableClientDiagnostics.List()...) |
|
| 26 |
- AvailableOverallDiagnostics.Insert(AvailableMasterDiagnostics.List()...) |
|
| 27 |
- AvailableOverallDiagnostics.Insert(AvailableNodeDiagnostics.List()...) |
|
| 28 |
-} |
|
| 29 |
- |
|
| 30 |
-type OverallDiagnosticsOptions struct {
|
|
| 22 |
+type DiagnosticsOptions struct {
|
|
| 31 | 23 |
RequestedDiagnostics util.StringList |
| 32 | 24 |
|
| 33 | 25 |
MasterConfigLocation string |
| 34 | 26 |
NodeConfigLocation string |
| 27 |
+ ClientClusterContext string |
|
| 28 |
+ IsHost bool |
|
| 35 | 29 |
|
| 36 |
- Factory *osclientcmd.Factory |
|
| 30 |
+ ClientFlags *flag.FlagSet |
|
| 31 |
+ Factory *osclientcmd.Factory |
|
| 37 | 32 |
|
| 38 | 33 |
LogOptions *log.LoggerOptions |
| 39 | 34 |
Logger *log.Logger |
| 40 | 35 |
} |
| 41 | 36 |
|
| 42 |
-const longAllDescription = ` |
|
| 37 |
+const longDescription = ` |
|
| 43 | 38 |
OpenShift Diagnostics |
| 44 | 39 |
|
| 45 |
-This command helps you understand and troubleshoot OpenShift. It is |
|
| 46 |
-intended to be run from the same context as an OpenShift client or running |
|
| 47 |
-master / node in order to troubleshoot from the perspective of each. |
|
| 40 |
+This command helps you understand and troubleshoot OpenShift. It runs |
|
| 41 |
+diagnostics against an OpenShift cluster as with a client and/or the |
|
| 42 |
+state of a running master / node host. |
|
| 48 | 43 |
|
| 49 | 44 |
$ %[1]s |
| 50 | 45 |
|
| 51 |
-If run without flags or subcommands, it will check for config files for |
|
| 52 |
-client, master, and node, and if found, use them for troubleshooting |
|
| 53 |
-those components. If master/node config files are not found, the tool |
|
| 54 |
-assumes they are not present and does diagnostics only as a client. |
|
| 55 |
- |
|
| 56 |
-You may also specify config files explicitly with flags below, in which |
|
| 57 |
-case you will receive an error if they are invalid or not found. |
|
| 46 |
+If run without flags, it will check for standard config files for |
|
| 47 |
+client, master, and node, and if found, use them for diagnostics. |
|
| 48 |
+You may also specify config files explicitly with flags, in which case |
|
| 49 |
+you will receive an error if they are not found. For example: |
|
| 58 | 50 |
|
| 59 | 51 |
$ %[1]s --master-config=/etc/openshift/master/master-config.yaml |
| 60 | 52 |
|
| 61 |
-Subcommands may be used to scope the troubleshooting to a particular |
|
| 62 |
-component and are not limited to using config files; you can and should |
|
| 63 |
-use the same flags that are actually set on the command line for that |
|
| 64 |
-component to configure the diagnostic. |
|
| 53 |
+* If master/node config files are not found and the --host flag is not |
|
| 54 |
+ present, host diagnostics are skipped. |
|
| 55 |
+* If the client has cluster-admin access, this access enables cluster |
|
| 56 |
+ diagnostics to run which regular users cannot. |
|
| 57 |
+* If a client config file is not found, client and cluster diagnostics |
|
| 58 |
+ are skipped. |
|
| 65 | 59 |
|
| 66 |
- $ %[1]s node --hostname='node.example.com' --kubeconfig=... |
|
| 67 |
- |
|
| 68 |
-NOTE: This is an alpha version of diagnostics and will change significantly. |
|
| 69 |
-NOTE: Global flags (from the 'options' subcommand) are ignored here but |
|
| 70 |
-can be used with subcommands. |
|
| 60 |
+NOTE: This is a beta version of diagnostics and may still evolve in a |
|
| 61 |
+different direction. |
|
| 71 | 62 |
` |
| 72 | 63 |
|
| 73 | 64 |
func NewCommandDiagnostics(name string, fullName string, out io.Writer) *cobra.Command {
|
| 74 |
- o := &OverallDiagnosticsOptions{
|
|
| 75 |
- RequestedDiagnostics: AvailableOverallDiagnostics.List(), |
|
| 65 |
+ o := &DiagnosticsOptions{
|
|
| 66 |
+ RequestedDiagnostics: util.StringList{},
|
|
| 76 | 67 |
LogOptions: &log.LoggerOptions{Out: out},
|
| 77 | 68 |
} |
| 78 | 69 |
|
| 79 | 70 |
cmd := &cobra.Command{
|
| 80 | 71 |
Use: name, |
| 81 | 72 |
Short: "This utility helps you understand and troubleshoot OpenShift v3.", |
| 82 |
- Long: fmt.Sprintf(longAllDescription, fullName), |
|
| 73 |
+ Long: fmt.Sprintf(longDescription, fullName), |
|
| 83 | 74 |
Run: func(c *cobra.Command, args []string) {
|
| 84 | 75 |
kcmdutil.CheckErr(o.Complete()) |
| 85 | 76 |
|
| 86 |
- failed, err := o.RunDiagnostics() |
|
| 87 |
- o.Logger.Summary() |
|
| 77 |
+ failed, err, warnCount, errorCount := o.RunDiagnostics() |
|
| 78 |
+ o.Logger.Summary(warnCount, errorCount) |
|
| 88 | 79 |
o.Logger.Finish() |
| 89 | 80 |
|
| 90 | 81 |
kcmdutil.CheckErr(err) |
| ... | ... |
@@ -96,21 +87,22 @@ func NewCommandDiagnostics(name string, fullName string, out io.Writer) *cobra.C |
| 96 | 96 |
} |
| 97 | 97 |
cmd.SetOutput(out) // for output re: usage / help |
| 98 | 98 |
|
| 99 |
- o.Factory = osclientcmd.New(cmd.Flags()) // side effect: add standard persistent flags for openshift client |
|
| 100 |
- cmd.Flags().StringVar(&o.MasterConfigLocation, "master-config", "", "path to master config file") |
|
| 101 |
- cmd.Flags().StringVar(&o.NodeConfigLocation, "node-config", "", "path to node config file") |
|
| 102 |
- diagnosticflags.BindLoggerOptionFlags(cmd.Flags(), o.LogOptions, diagnosticflags.RecommendedLoggerOptionFlags()) |
|
| 103 |
- diagnosticflags.BindDiagnosticFlag(cmd.Flags(), &o.RequestedDiagnostics, diagnosticflags.NewRecommendedDiagnosticFlag()) |
|
| 104 |
- |
|
| 105 |
- cmd.AddCommand(NewClientCommand(ClientDiagnosticsRecommendedName, name+" "+ClientDiagnosticsRecommendedName, out)) |
|
| 106 |
- cmd.AddCommand(NewMasterCommand(MasterDiagnosticsRecommendedName, name+" "+MasterDiagnosticsRecommendedName, out)) |
|
| 107 |
- cmd.AddCommand(NewNodeCommand(NodeDiagnosticsRecommendedName, name+" "+NodeDiagnosticsRecommendedName, out)) |
|
| 108 |
- cmd.AddCommand(NewOptionsCommand()) |
|
| 99 |
+ o.ClientFlags = flag.NewFlagSet("client", flag.ContinueOnError) // hide the extensive set of client flags
|
|
| 100 |
+ o.Factory = osclientcmd.New(o.ClientFlags) // that would otherwise be added to this command |
|
| 101 |
+ cmd.Flags().AddFlag(o.ClientFlags.Lookup(config.OpenShiftConfigFlagName)) |
|
| 102 |
+ cmd.Flags().AddFlag(o.ClientFlags.Lookup("context")) // TODO: find k8s constant
|
|
| 103 |
+ cmd.Flags().StringVar(&o.ClientClusterContext, options.FlagClusterContextName, "", "client context to use for cluster administrator") |
|
| 104 |
+ cmd.Flags().StringVar(&o.MasterConfigLocation, options.FlagMasterConfigName, "", "path to master config file (implies --host)") |
|
| 105 |
+ cmd.Flags().StringVar(&o.NodeConfigLocation, options.FlagNodeConfigName, "", "path to node config file (implies --host)") |
|
| 106 |
+ cmd.Flags().BoolVar(&o.IsHost, options.FlagIsHostName, false, "look for systemd and journald units even without master/node config") |
|
| 107 |
+ flagtypes.GLog(cmd.Flags()) |
|
| 108 |
+ options.BindLoggerOptionFlags(cmd.Flags(), o.LogOptions, options.RecommendedLoggerOptionFlags()) |
|
| 109 |
+ options.BindDiagnosticFlag(cmd.Flags(), &o.RequestedDiagnostics, options.NewRecommendedDiagnosticFlag()) |
|
| 109 | 110 |
|
| 110 | 111 |
return cmd |
| 111 | 112 |
} |
| 112 | 113 |
|
| 113 |
-func (o *OverallDiagnosticsOptions) Complete() error {
|
|
| 114 |
+func (o *DiagnosticsOptions) Complete() error {
|
|
| 114 | 115 |
var err error |
| 115 | 116 |
o.Logger, err = o.LogOptions.NewLogger() |
| 116 | 117 |
if err != nil {
|
| ... | ... |
@@ -120,112 +112,129 @@ func (o *OverallDiagnosticsOptions) Complete() error {
|
| 120 | 120 |
return nil |
| 121 | 121 |
} |
| 122 | 122 |
|
| 123 |
-func (o OverallDiagnosticsOptions) RunDiagnostics() (bool, error) {
|
|
| 123 |
+func (o DiagnosticsOptions) RunDiagnostics() (bool, error, int, int) {
|
|
| 124 | 124 |
failed := false |
| 125 |
+ warnings := []error{}
|
|
| 125 | 126 |
errors := []error{}
|
| 126 |
- |
|
| 127 |
- masterFailed, err := o.CheckMaster() |
|
| 128 |
- failed = failed && masterFailed |
|
| 129 |
- if err != nil {
|
|
| 130 |
- errors = append(errors, err) |
|
| 131 |
- } |
|
| 132 |
- |
|
| 133 |
- nodeFailed, err := o.CheckNode() |
|
| 134 |
- failed = failed && nodeFailed |
|
| 135 |
- if err != nil {
|
|
| 136 |
- errors = append(errors, err) |
|
| 137 |
- } |
|
| 138 |
- |
|
| 139 |
- clientFailed, err := o.CheckClient() |
|
| 140 |
- failed = failed && clientFailed |
|
| 141 |
- if err != nil {
|
|
| 142 |
- errors = append(errors, err) |
|
| 143 |
- } |
|
| 144 |
- |
|
| 145 |
- return failed, kutilerrors.NewAggregate(errors) |
|
| 146 |
-} |
|
| 147 |
- |
|
| 148 |
-func (o OverallDiagnosticsOptions) CheckClient() (bool, error) {
|
|
| 149 |
- runClientChecks := true |
|
| 150 |
- |
|
| 151 |
- _, kubeClient, err := o.Factory.Clients() |
|
| 152 |
- if err != nil {
|
|
| 153 |
- runClientChecks = false |
|
| 127 |
+ diagnostics := map[string][]types.Diagnostic{}
|
|
| 128 |
+ AvailableDiagnostics := util.NewStringSet() |
|
| 129 |
+ AvailableDiagnostics.Insert(AvailableClientDiagnostics.List()...) |
|
| 130 |
+ AvailableDiagnostics.Insert(AvailableClusterDiagnostics.List()...) |
|
| 131 |
+ AvailableDiagnostics.Insert(AvailableHostDiagnostics.List()...) |
|
| 132 |
+ if len(o.RequestedDiagnostics) == 0 {
|
|
| 133 |
+ o.RequestedDiagnostics = AvailableDiagnostics.List() |
|
| 134 |
+ } else if common := intersection(util.NewStringSet(o.RequestedDiagnostics...), AvailableDiagnostics); len(common) == 0 {
|
|
| 135 |
+ o.Logger.Errort("emptyReqDiag", "None of the requested diagnostics are available:\n {{.requested}}\nPlease try from the following:\n {{.available}}",
|
|
| 136 |
+ log.Hash{"requested": o.RequestedDiagnostics, "available": AvailableDiagnostics.List()})
|
|
| 137 |
+ return false, fmt.Errorf("No requested diagnostics available"), 0, 1
|
|
| 138 |
+ } else if len(common) < len(o.RequestedDiagnostics) {
|
|
| 139 |
+ errors = append(errors, fmt.Errorf("Not all requested diagnostics are available"))
|
|
| 140 |
+ o.Logger.Errort("notAllReqDiag", `
|
|
| 141 |
+Of the requested diagnostics: |
|
| 142 |
+ {{.requested}}
|
|
| 143 |
+only these are available: |
|
| 144 |
+ {{.common}}
|
|
| 145 |
+The list of all possible is: |
|
| 146 |
+ {{.available}}
|
|
| 147 |
+ `, log.Hash{"requested": o.RequestedDiagnostics, "common": common.List(), "available": AvailableDiagnostics.List()})
|
|
| 154 | 148 |
} |
| 155 | 149 |
|
| 156 |
- kubeConfig, err := o.Factory.OpenShiftClientConfig.RawConfig() |
|
| 157 |
- if err != nil {
|
|
| 158 |
- runClientChecks = false |
|
| 159 |
- } |
|
| 160 |
- |
|
| 161 |
- if runClientChecks {
|
|
| 162 |
- clientDiagnosticOptions := &ClientDiagnosticsOptions{
|
|
| 163 |
- RequestedDiagnostics: intersection(util.NewStringSet(o.RequestedDiagnostics...), AvailableClientDiagnostics).List(), |
|
| 164 |
- KubeClient: kubeClient, |
|
| 165 |
- KubeConfig: &kubeConfig, |
|
| 166 |
- LogOptions: o.LogOptions, |
|
| 167 |
- Logger: o.Logger, |
|
| 150 |
+ func() { // don't trust discovery/build of diagnostics; wrap panic nicely in case of developer error
|
|
| 151 |
+ defer func() {
|
|
| 152 |
+ if r := recover(); r != nil {
|
|
| 153 |
+ failed = true |
|
| 154 |
+ errors = append(errors, fmt.Errorf("While building the diagnostics, a panic was encountered.\nThis is a bug in diagnostics. Stack trace follows : \n%v", r))
|
|
| 155 |
+ } |
|
| 156 |
+ }() |
|
| 157 |
+ detected, detectWarnings, detectErrors := o.detectClientConfig() // may log and return problems |
|
| 158 |
+ for _, warn := range detectWarnings {
|
|
| 159 |
+ warnings = append(warnings, warn) |
|
| 168 | 160 |
} |
| 161 |
+ for _, err := range detectErrors {
|
|
| 162 |
+ errors = append(errors, err) |
|
| 163 |
+ } |
|
| 164 |
+ if !detected { // there just plain isn't any client config file available
|
|
| 165 |
+ o.Logger.Notice("discNoClientConf", "No client configuration specified; skipping client and cluster diagnostics.")
|
|
| 166 |
+ } else if rawConfig, err := o.buildRawConfig(); rawConfig == nil { // client config is totally broken - won't parse etc (problems may have been detected and logged)
|
|
| 167 |
+ o.Logger.Errorf("discBrokenClientConf", "Client configuration failed to load; skipping client and cluster diagnostics due to error: {{.error}}", log.Hash{"error": err.Error()})
|
|
| 168 |
+ errors = append(errors, err) |
|
| 169 |
+ } else {
|
|
| 170 |
+ if err != nil { // error encountered, proceed with caution
|
|
| 171 |
+ o.Logger.Errorf("discClientConfErr", "Client configuration loading encountered an error, but proceeding anyway. Error was:\n{{.error}}", log.Hash{"error": err.Error()})
|
|
| 172 |
+ errors = append(errors, err) |
|
| 173 |
+ } |
|
| 174 |
+ if clientDiags, ok, err := o.buildClientDiagnostics(rawConfig); ok {
|
|
| 175 |
+ diagnostics["client"] = clientDiags |
|
| 176 |
+ } else if err != nil {
|
|
| 177 |
+ failed = true |
|
| 178 |
+ errors = append(errors, err) |
|
| 179 |
+ } |
|
| 169 | 180 |
|
| 170 |
- return clientDiagnosticOptions.RunDiagnostics() |
|
| 171 |
- } |
|
| 172 |
- |
|
| 173 |
- return false, nil |
|
| 174 |
-} |
|
| 175 |
- |
|
| 176 |
-func (o OverallDiagnosticsOptions) CheckNode() (bool, error) {
|
|
| 177 |
- if len(o.NodeConfigLocation) == 0 {
|
|
| 178 |
- if _, err := os.Stat(StandardNodeConfigPath); !os.IsNotExist(err) {
|
|
| 179 |
- o.NodeConfigLocation = StandardNodeConfigPath |
|
| 181 |
+ if clusterDiags, ok, err := o.buildClusterDiagnostics(rawConfig); ok {
|
|
| 182 |
+ diagnostics["cluster"] = clusterDiags |
|
| 183 |
+ } else if err != nil {
|
|
| 184 |
+ failed = true |
|
| 185 |
+ errors = append(errors, err) |
|
| 186 |
+ } |
|
| 180 | 187 |
} |
| 181 |
- } |
|
| 182 | 188 |
|
| 183 |
- if len(o.NodeConfigLocation) != 0 {
|
|
| 184 |
- masterDiagnosticOptions := &NodeDiagnosticsOptions{
|
|
| 185 |
- RequestedDiagnostics: intersection(util.NewStringSet(o.RequestedDiagnostics...), AvailableNodeDiagnostics).List(), |
|
| 186 |
- NodeConfigLocation: o.NodeConfigLocation, |
|
| 187 |
- LogOptions: o.LogOptions, |
|
| 188 |
- Logger: o.Logger, |
|
| 189 |
+ if hostDiags, ok, err := o.buildHostDiagnostics(); ok {
|
|
| 190 |
+ diagnostics["host"] = hostDiags |
|
| 191 |
+ } else if err != nil {
|
|
| 192 |
+ failed = true |
|
| 193 |
+ errors = append(errors, err) |
|
| 189 | 194 |
} |
| 195 |
+ }() |
|
| 190 | 196 |
|
| 191 |
- return masterDiagnosticOptions.RunDiagnostics() |
|
| 197 |
+ if failed {
|
|
| 198 |
+ return failed, kutilerrors.NewAggregate(errors), len(warnings), len(errors) |
|
| 192 | 199 |
} |
| 193 | 200 |
|
| 194 |
- return false, nil |
|
| 201 |
+ failed, err, numWarnings, numErrors := o.Run(diagnostics) |
|
| 202 |
+ numWarnings += len(warnings) |
|
| 203 |
+ numErrors += len(errors) |
|
| 204 |
+ return failed, err, numWarnings, numErrors |
|
| 195 | 205 |
} |
| 196 | 206 |
|
| 197 |
-func (o OverallDiagnosticsOptions) CheckMaster() (bool, error) {
|
|
| 198 |
- if len(o.MasterConfigLocation) == 0 {
|
|
| 199 |
- if _, err := os.Stat(StandardMasterConfigPath); !os.IsNotExist(err) {
|
|
| 200 |
- o.MasterConfigLocation = StandardMasterConfigPath |
|
| 207 |
+func (o DiagnosticsOptions) Run(diagnostics map[string][]types.Diagnostic) (bool, error, int, int) {
|
|
| 208 |
+ warnCount := 0 |
|
| 209 |
+ errorCount := 0 |
|
| 210 |
+ for area, areaDiagnostics := range diagnostics {
|
|
| 211 |
+ for _, diagnostic := range areaDiagnostics {
|
|
| 212 |
+ func() { // wrap diagnostic panic nicely in case of developer error
|
|
| 213 |
+ defer func() {
|
|
| 214 |
+ if r := recover(); r != nil {
|
|
| 215 |
+ errorCount += 1 |
|
| 216 |
+ o.Logger.Errort("diagPanic",
|
|
| 217 |
+ "While running the {{.area}}.{{.name}} diagnostic, a panic was encountered.\nThis is a bug in diagnostics. Stack trace follows : \n{{.error}}",
|
|
| 218 |
+ log.Hash{"area": area, "name": diagnostic.Name(), "error": fmt.Sprintf("%v", r)})
|
|
| 219 |
+ } |
|
| 220 |
+ }() |
|
| 221 |
+ |
|
| 222 |
+ if canRun, reason := diagnostic.CanRun(); !canRun {
|
|
| 223 |
+ if reason == nil {
|
|
| 224 |
+ o.Logger.Noticet("diagSkip", "Skipping diagnostic: {{.area}}.{{.name}}\nDescription: {{.diag}}",
|
|
| 225 |
+ log.Hash{"area": area, "name": diagnostic.Name(), "diag": diagnostic.Description()})
|
|
| 226 |
+ } else {
|
|
| 227 |
+ o.Logger.Noticet("diagSkip", "Skipping diagnostic: {{.area}}.{{.name}}\nDescription: {{.diag}}\nBecause: {{.reason}}",
|
|
| 228 |
+ log.Hash{"area": area, "name": diagnostic.Name(), "diag": diagnostic.Description(), "reason": reason.Error()})
|
|
| 229 |
+ } |
|
| 230 |
+ return |
|
| 231 |
+ } |
|
| 232 |
+ |
|
| 233 |
+ o.Logger.Noticet("diagRun", "Running diagnostic: {{.area}}.{{.name}}\nDescription: {{.diag}}",
|
|
| 234 |
+ log.Hash{"area": area, "name": diagnostic.Name(), "diag": diagnostic.Description()})
|
|
| 235 |
+ r := diagnostic.Check() |
|
| 236 |
+ for _, entry := range r.Logs() {
|
|
| 237 |
+ o.Logger.LogEntry(entry) |
|
| 238 |
+ } |
|
| 239 |
+ warnCount += len(r.Warnings()) |
|
| 240 |
+ errorCount += len(r.Errors()) |
|
| 241 |
+ }() |
|
| 201 | 242 |
} |
| 202 |
- } |
|
| 203 |
- |
|
| 204 |
- if len(o.MasterConfigLocation) != 0 {
|
|
| 205 |
- masterDiagnosticOptions := &MasterDiagnosticsOptions{
|
|
| 206 |
- RequestedDiagnostics: intersection(util.NewStringSet(o.RequestedDiagnostics...), AvailableMasterDiagnostics).List(), |
|
| 207 |
- MasterConfigLocation: o.MasterConfigLocation, |
|
| 208 |
- LogOptions: o.LogOptions, |
|
| 209 |
- Logger: o.Logger, |
|
| 210 |
- } |
|
| 211 |
- |
|
| 212 |
- return masterDiagnosticOptions.RunDiagnostics() |
|
| 213 |
- } |
|
| 214 |
- |
|
| 215 |
- return false, nil |
|
| 216 |
-} |
|
| 217 | 243 |
|
| 218 |
-func NewOptionsCommand() *cobra.Command {
|
|
| 219 |
- cmd := &cobra.Command{
|
|
| 220 |
- Use: "options", |
|
| 221 |
- Run: func(cmd *cobra.Command, args []string) {
|
|
| 222 |
- cmd.Usage() |
|
| 223 |
- }, |
|
| 224 | 244 |
} |
| 225 |
- |
|
| 226 |
- templates.UseOptionsTemplates(cmd) |
|
| 227 |
- |
|
| 228 |
- return cmd |
|
| 245 |
+ return errorCount > 0, nil, warnCount, errorCount |
|
| 229 | 246 |
} |
| 230 | 247 |
|
| 231 | 248 |
// TODO move upstream |
| 232 | 249 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,76 @@ |
| 0 |
+package diagnostics |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "fmt" |
|
| 4 |
+ "github.com/GoogleCloudPlatform/kubernetes/pkg/util" |
|
| 5 |
+ "github.com/openshift/origin/pkg/diagnostics/host" |
|
| 6 |
+ systemddiagnostics "github.com/openshift/origin/pkg/diagnostics/systemd" |
|
| 7 |
+ "github.com/openshift/origin/pkg/diagnostics/types" |
|
| 8 |
+ "os" |
|
| 9 |
+) |
|
| 10 |
+ |
|
| 11 |
+const ( |
|
| 12 |
+ StandardMasterConfigPath string = "/etc/openshift/master/master-config.yaml" |
|
| 13 |
+ StandardNodeConfigPath string = "/etc/openshift/node/node-config.yaml" |
|
| 14 |
+) |
|
| 15 |
+ |
|
| 16 |
+var ( |
|
| 17 |
+ AvailableHostDiagnostics = util.NewStringSet("AnalyzeLogs", "UnitStatus", "MasterConfigCheck", "NodeConfigCheck")
|
|
| 18 |
+) |
|
| 19 |
+ |
|
| 20 |
+func (o DiagnosticsOptions) buildHostDiagnostics() ([]types.Diagnostic, bool /* ok */, error) {
|
|
| 21 |
+ requestedDiagnostics := intersection(util.NewStringSet(o.RequestedDiagnostics...), AvailableHostDiagnostics).List() |
|
| 22 |
+ if len(requestedDiagnostics) == 0 { // no diagnostics to run here
|
|
| 23 |
+ return nil, true, nil // don't waste time on discovery |
|
| 24 |
+ } |
|
| 25 |
+ isHost := o.IsHost |
|
| 26 |
+ // check for standard host config paths if not given |
|
| 27 |
+ if len(o.MasterConfigLocation) == 0 {
|
|
| 28 |
+ if _, err := os.Stat(StandardMasterConfigPath); !os.IsNotExist(err) {
|
|
| 29 |
+ o.MasterConfigLocation = StandardMasterConfigPath |
|
| 30 |
+ isHost = true |
|
| 31 |
+ } |
|
| 32 |
+ } else {
|
|
| 33 |
+ isHost = true |
|
| 34 |
+ } |
|
| 35 |
+ if len(o.NodeConfigLocation) == 0 {
|
|
| 36 |
+ if _, err := os.Stat(StandardNodeConfigPath); !os.IsNotExist(err) {
|
|
| 37 |
+ o.NodeConfigLocation = StandardNodeConfigPath |
|
| 38 |
+ isHost = true |
|
| 39 |
+ } |
|
| 40 |
+ } else {
|
|
| 41 |
+ isHost = true |
|
| 42 |
+ } |
|
| 43 |
+ |
|
| 44 |
+ // If we're not looking at a host, don't try the diagnostics |
|
| 45 |
+ if !isHost {
|
|
| 46 |
+ return nil, true, nil |
|
| 47 |
+ } |
|
| 48 |
+ |
|
| 49 |
+ diagnostics := []types.Diagnostic{}
|
|
| 50 |
+ systemdUnits := systemddiagnostics.GetSystemdUnits(o.Logger) |
|
| 51 |
+ for _, diagnosticName := range requestedDiagnostics {
|
|
| 52 |
+ switch diagnosticName {
|
|
| 53 |
+ case "AnalyzeLogs": |
|
| 54 |
+ diagnostics = append(diagnostics, systemddiagnostics.AnalyzeLogs{systemdUnits})
|
|
| 55 |
+ |
|
| 56 |
+ case "UnitStatus": |
|
| 57 |
+ diagnostics = append(diagnostics, systemddiagnostics.UnitStatus{systemdUnits})
|
|
| 58 |
+ |
|
| 59 |
+ case "MasterConfigCheck": |
|
| 60 |
+ if len(o.MasterConfigLocation) > 0 {
|
|
| 61 |
+ diagnostics = append(diagnostics, host.MasterConfigCheck{o.MasterConfigLocation})
|
|
| 62 |
+ } |
|
| 63 |
+ |
|
| 64 |
+ case "NodeConfigCheck": |
|
| 65 |
+ if len(o.NodeConfigLocation) > 0 {
|
|
| 66 |
+ diagnostics = append(diagnostics, host.NodeConfigCheck{o.NodeConfigLocation})
|
|
| 67 |
+ } |
|
| 68 |
+ |
|
| 69 |
+ default: |
|
| 70 |
+ return diagnostics, false, fmt.Errorf("unknown diagnostic: %v", diagnosticName)
|
|
| 71 |
+ } |
|
| 72 |
+ } |
|
| 73 |
+ |
|
| 74 |
+ return diagnostics, true, nil |
|
| 75 |
+} |
| 0 | 76 |
deleted file mode 100644 |
| ... | ... |
@@ -1,143 +0,0 @@ |
| 1 |
-package diagnostics |
|
| 2 |
- |
|
| 3 |
-import ( |
|
| 4 |
- "fmt" |
|
| 5 |
- "io" |
|
| 6 |
- "os" |
|
| 7 |
- |
|
| 8 |
- "github.com/spf13/cobra" |
|
| 9 |
- |
|
| 10 |
- kcmdutil "github.com/GoogleCloudPlatform/kubernetes/pkg/kubectl/cmd/util" |
|
| 11 |
- "github.com/GoogleCloudPlatform/kubernetes/pkg/util" |
|
| 12 |
- |
|
| 13 |
- diagnosticflags "github.com/openshift/origin/pkg/cmd/experimental/diagnostics/options" |
|
| 14 |
- "github.com/openshift/origin/pkg/diagnostics/log" |
|
| 15 |
- masterdiagnostics "github.com/openshift/origin/pkg/diagnostics/master" |
|
| 16 |
- systemddiagnostics "github.com/openshift/origin/pkg/diagnostics/systemd" |
|
| 17 |
- diagnostictypes "github.com/openshift/origin/pkg/diagnostics/types/diagnostic" |
|
| 18 |
-) |
|
| 19 |
- |
|
| 20 |
-const ( |
|
| 21 |
- MasterDiagnosticsRecommendedName = "master" |
|
| 22 |
- |
|
| 23 |
- StandardMasterConfigPath string = "/etc/openshift/master/master-config.yaml" |
|
| 24 |
-) |
|
| 25 |
- |
|
| 26 |
-var ( |
|
| 27 |
- AvailableMasterDiagnostics = util.NewStringSet("AnalyzeLogs", "UnitStatus", "MasterConfigCheck")
|
|
| 28 |
-) |
|
| 29 |
- |
|
| 30 |
-// user options for openshift-diagnostics client command |
|
| 31 |
-type MasterDiagnosticsOptions struct {
|
|
| 32 |
- RequestedDiagnostics util.StringList |
|
| 33 |
- |
|
| 34 |
- MasterConfigLocation string |
|
| 35 |
- |
|
| 36 |
- LogOptions *log.LoggerOptions |
|
| 37 |
- Logger *log.Logger |
|
| 38 |
-} |
|
| 39 |
- |
|
| 40 |
-const longMasterDescription = ` |
|
| 41 |
-OpenShift Diagnostics |
|
| 42 |
- |
|
| 43 |
-This command helps you understand and troubleshoot a running OpenShift |
|
| 44 |
-master. It is intended to be run from the same context as the master |
|
| 45 |
-(where "openshift start" or "openshift start master" is run, possibly from |
|
| 46 |
-systemd or inside a container) and with the same configuration options. |
|
| 47 |
- |
|
| 48 |
- $ %s |
|
| 49 |
-` |
|
| 50 |
- |
|
| 51 |
-func NewMasterCommand(name string, fullName string, out io.Writer) *cobra.Command {
|
|
| 52 |
- o := &MasterDiagnosticsOptions{
|
|
| 53 |
- RequestedDiagnostics: AvailableMasterDiagnostics.List(), |
|
| 54 |
- LogOptions: &log.LoggerOptions{Out: out},
|
|
| 55 |
- } |
|
| 56 |
- |
|
| 57 |
- cmd := &cobra.Command{
|
|
| 58 |
- Use: name, |
|
| 59 |
- Short: "Troubleshoot an OpenShift v3 master.", |
|
| 60 |
- Long: fmt.Sprintf(longMasterDescription, fullName), |
|
| 61 |
- Run: func(c *cobra.Command, args []string) {
|
|
| 62 |
- kcmdutil.CheckErr(o.Complete()) |
|
| 63 |
- |
|
| 64 |
- failed, err := o.RunDiagnostics() |
|
| 65 |
- o.Logger.Summary() |
|
| 66 |
- o.Logger.Finish() |
|
| 67 |
- |
|
| 68 |
- kcmdutil.CheckErr(err) |
|
| 69 |
- if failed {
|
|
| 70 |
- os.Exit(255) |
|
| 71 |
- } |
|
| 72 |
- }, |
|
| 73 |
- } |
|
| 74 |
- |
|
| 75 |
- cmd.SetOutput(out) // for output re: usage / help |
|
| 76 |
- |
|
| 77 |
- cmd.Flags().StringVar(&o.MasterConfigLocation, "master-config", "", "path to master config file") |
|
| 78 |
- diagnosticflags.BindLoggerOptionFlags(cmd.Flags(), o.LogOptions, diagnosticflags.RecommendedLoggerOptionFlags()) |
|
| 79 |
- diagnosticflags.BindDiagnosticFlag(cmd.Flags(), &o.RequestedDiagnostics, diagnosticflags.NewRecommendedDiagnosticFlag()) |
|
| 80 |
- |
|
| 81 |
- return cmd |
|
| 82 |
-} |
|
| 83 |
- |
|
| 84 |
-func (o *MasterDiagnosticsOptions) Complete() error {
|
|
| 85 |
- // set the master config location if it hasn't been set and we find it in an expected location |
|
| 86 |
- if len(o.MasterConfigLocation) == 0 {
|
|
| 87 |
- if _, err := os.Stat(StandardMasterConfigPath); !os.IsNotExist(err) {
|
|
| 88 |
- o.MasterConfigLocation = StandardMasterConfigPath |
|
| 89 |
- } |
|
| 90 |
- |
|
| 91 |
- } |
|
| 92 |
- |
|
| 93 |
- var err error |
|
| 94 |
- o.Logger, err = o.LogOptions.NewLogger() |
|
| 95 |
- if err != nil {
|
|
| 96 |
- return err |
|
| 97 |
- } |
|
| 98 |
- |
|
| 99 |
- return nil |
|
| 100 |
-} |
|
| 101 |
- |
|
| 102 |
-func (o MasterDiagnosticsOptions) RunDiagnostics() (bool, error) {
|
|
| 103 |
- diagnostics := map[string]diagnostictypes.Diagnostic{}
|
|
| 104 |
- |
|
| 105 |
- // if we don't have a master config file, then there's no work to do |
|
| 106 |
- if len(o.MasterConfigLocation) == 0 {
|
|
| 107 |
- // TODO remove MasterConfigCheck from the list |
|
| 108 |
- } |
|
| 109 |
- |
|
| 110 |
- systemdUnits := systemddiagnostics.GetSystemdUnits(o.Logger) |
|
| 111 |
- |
|
| 112 |
- for _, diagnosticName := range o.RequestedDiagnostics {
|
|
| 113 |
- switch diagnosticName {
|
|
| 114 |
- case "AnalyzeLogs": |
|
| 115 |
- diagnostics[diagnosticName] = systemddiagnostics.AnalyzeLogs{systemdUnits, o.Logger}
|
|
| 116 |
- |
|
| 117 |
- case "UnitStatus": |
|
| 118 |
- diagnostics[diagnosticName] = systemddiagnostics.UnitStatus{systemdUnits, o.Logger}
|
|
| 119 |
- |
|
| 120 |
- case "MasterConfigCheck": |
|
| 121 |
- diagnostics[diagnosticName] = masterdiagnostics.MasterConfigCheck{o.MasterConfigLocation, o.Logger}
|
|
| 122 |
- |
|
| 123 |
- default: |
|
| 124 |
- return false, fmt.Errorf("unknown diagnostic: %v", diagnosticName)
|
|
| 125 |
- } |
|
| 126 |
- } |
|
| 127 |
- |
|
| 128 |
- for name, diagnostic := range diagnostics {
|
|
| 129 |
- if canRun, reason := diagnostic.CanRun(); !canRun {
|
|
| 130 |
- if reason == nil {
|
|
| 131 |
- o.Logger.Noticem(log.Message{ID: "diagSkip", Template: "Skipping diagnostic: {{.area}}.{{.name}}\nDescription: {{.diag}}", TemplateData: map[string]string{"area": "master", "name": name, "diag": diagnostic.Description()}})
|
|
| 132 |
- } else {
|
|
| 133 |
- o.Logger.Noticem(log.Message{ID: "diagSkip", Template: "Skipping diagnostic: {{.area}}.{{.name}}\nDescription: {{.diag}}\nBecause: {{.reason}}", TemplateData: map[string]string{"area": "master", "name": name, "diag": diagnostic.Description(), "reason": reason.Error()}})
|
|
| 134 |
- } |
|
| 135 |
- continue |
|
| 136 |
- } |
|
| 137 |
- |
|
| 138 |
- o.Logger.Noticem(log.Message{ID: "diagRun", Template: "Running diagnostic: {{.area}}.{{.name}}\nDescription: {{.diag}}", TemplateData: map[string]string{"area": "master", "name": name, "diag": diagnostic.Description()}})
|
|
| 139 |
- diagnostic.Check() |
|
| 140 |
- } |
|
| 141 |
- |
|
| 142 |
- return o.Logger.ErrorsSeen(), nil |
|
| 143 |
-} |
| 144 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,142 +0,0 @@ |
| 1 |
-package diagnostics |
|
| 2 |
- |
|
| 3 |
-import ( |
|
| 4 |
- "fmt" |
|
| 5 |
- "io" |
|
| 6 |
- "os" |
|
| 7 |
- |
|
| 8 |
- "github.com/spf13/cobra" |
|
| 9 |
- |
|
| 10 |
- kcmdutil "github.com/GoogleCloudPlatform/kubernetes/pkg/kubectl/cmd/util" |
|
| 11 |
- "github.com/GoogleCloudPlatform/kubernetes/pkg/util" |
|
| 12 |
- |
|
| 13 |
- diagnosticflags "github.com/openshift/origin/pkg/cmd/experimental/diagnostics/options" |
|
| 14 |
- "github.com/openshift/origin/pkg/diagnostics/log" |
|
| 15 |
- nodediagnostics "github.com/openshift/origin/pkg/diagnostics/node" |
|
| 16 |
- systemddiagnostics "github.com/openshift/origin/pkg/diagnostics/systemd" |
|
| 17 |
- diagnostictypes "github.com/openshift/origin/pkg/diagnostics/types/diagnostic" |
|
| 18 |
-) |
|
| 19 |
- |
|
| 20 |
-const ( |
|
| 21 |
- NodeDiagnosticsRecommendedName = "node" |
|
| 22 |
- |
|
| 23 |
- StandardNodeConfigPath string = "/etc/openshift/node/node-config.yaml" |
|
| 24 |
-) |
|
| 25 |
- |
|
| 26 |
-var ( |
|
| 27 |
- AvailableNodeDiagnostics = util.NewStringSet("AnalyzeLogs", "UnitStatus", "NodeConfigCheck")
|
|
| 28 |
-) |
|
| 29 |
- |
|
| 30 |
-// user options for openshift-diagnostics client command |
|
| 31 |
-type NodeDiagnosticsOptions struct {
|
|
| 32 |
- RequestedDiagnostics util.StringList |
|
| 33 |
- |
|
| 34 |
- NodeConfigLocation string |
|
| 35 |
- |
|
| 36 |
- LogOptions *log.LoggerOptions |
|
| 37 |
- Logger *log.Logger |
|
| 38 |
-} |
|
| 39 |
- |
|
| 40 |
-const longNodeDescription = ` |
|
| 41 |
-OpenShift Diagnostics |
|
| 42 |
- |
|
| 43 |
-This command helps you understand and troubleshoot a running OpenShift |
|
| 44 |
-node. It is intended to be run from the same context as the node |
|
| 45 |
-(where "openshift start" or "openshift start node" is run, possibly from |
|
| 46 |
-systemd or inside a container) and with the same configuration options. |
|
| 47 |
- |
|
| 48 |
- $ %s |
|
| 49 |
-` |
|
| 50 |
- |
|
| 51 |
-func NewNodeCommand(name string, fullName string, out io.Writer) *cobra.Command {
|
|
| 52 |
- o := &NodeDiagnosticsOptions{
|
|
| 53 |
- RequestedDiagnostics: AvailableNodeDiagnostics.List(), |
|
| 54 |
- LogOptions: &log.LoggerOptions{Out: out},
|
|
| 55 |
- } |
|
| 56 |
- |
|
| 57 |
- cmd := &cobra.Command{
|
|
| 58 |
- Use: name, |
|
| 59 |
- Short: "Troubleshoot an OpenShift v3 node.", |
|
| 60 |
- Long: fmt.Sprintf(longNodeDescription, fullName), |
|
| 61 |
- Run: func(c *cobra.Command, args []string) {
|
|
| 62 |
- kcmdutil.CheckErr(o.Complete()) |
|
| 63 |
- |
|
| 64 |
- failed, err := o.RunDiagnostics() |
|
| 65 |
- o.Logger.Summary() |
|
| 66 |
- o.Logger.Finish() |
|
| 67 |
- |
|
| 68 |
- kcmdutil.CheckErr(err) |
|
| 69 |
- if failed {
|
|
| 70 |
- os.Exit(255) |
|
| 71 |
- } |
|
| 72 |
- }, |
|
| 73 |
- } |
|
| 74 |
- |
|
| 75 |
- cmd.SetOutput(out) // for output re: usage / help |
|
| 76 |
- |
|
| 77 |
- cmd.Flags().StringVar(&o.NodeConfigLocation, "node-config", "", "path to node config file") |
|
| 78 |
- diagnosticflags.BindLoggerOptionFlags(cmd.Flags(), o.LogOptions, diagnosticflags.RecommendedLoggerOptionFlags()) |
|
| 79 |
- diagnosticflags.BindDiagnosticFlag(cmd.Flags(), &o.RequestedDiagnostics, diagnosticflags.NewRecommendedDiagnosticFlag()) |
|
| 80 |
- |
|
| 81 |
- return cmd |
|
| 82 |
-} |
|
| 83 |
- |
|
| 84 |
-func (o *NodeDiagnosticsOptions) Complete() error {
|
|
| 85 |
- // set the node config location if it hasn't been set and we find it in an expected location |
|
| 86 |
- if len(o.NodeConfigLocation) == 0 {
|
|
| 87 |
- if _, err := os.Stat(StandardNodeConfigPath); !os.IsNotExist(err) {
|
|
| 88 |
- o.NodeConfigLocation = StandardNodeConfigPath |
|
| 89 |
- } |
|
| 90 |
- } |
|
| 91 |
- |
|
| 92 |
- var err error |
|
| 93 |
- o.Logger, err = o.LogOptions.NewLogger() |
|
| 94 |
- if err != nil {
|
|
| 95 |
- return err |
|
| 96 |
- } |
|
| 97 |
- |
|
| 98 |
- return nil |
|
| 99 |
-} |
|
| 100 |
- |
|
| 101 |
-func (o NodeDiagnosticsOptions) RunDiagnostics() (bool, error) {
|
|
| 102 |
- diagnostics := map[string]diagnostictypes.Diagnostic{}
|
|
| 103 |
- |
|
| 104 |
- // if we don't have a node config file, then there's no work to do |
|
| 105 |
- if len(o.NodeConfigLocation) == 0 {
|
|
| 106 |
- // TODO remove NodeConfigCheck from the list |
|
| 107 |
- } |
|
| 108 |
- |
|
| 109 |
- systemdUnits := systemddiagnostics.GetSystemdUnits(o.Logger) |
|
| 110 |
- |
|
| 111 |
- for _, diagnosticName := range o.RequestedDiagnostics {
|
|
| 112 |
- switch diagnosticName {
|
|
| 113 |
- case "AnalyzeLogs": |
|
| 114 |
- diagnostics[diagnosticName] = systemddiagnostics.AnalyzeLogs{systemdUnits, o.Logger}
|
|
| 115 |
- |
|
| 116 |
- case "UnitStatus": |
|
| 117 |
- diagnostics[diagnosticName] = systemddiagnostics.UnitStatus{systemdUnits, o.Logger}
|
|
| 118 |
- |
|
| 119 |
- case "NodeConfigCheck": |
|
| 120 |
- diagnostics[diagnosticName] = nodediagnostics.NodeConfigCheck{o.NodeConfigLocation, o.Logger}
|
|
| 121 |
- |
|
| 122 |
- default: |
|
| 123 |
- return false, fmt.Errorf("unknown diagnostic: %v", diagnosticName)
|
|
| 124 |
- } |
|
| 125 |
- } |
|
| 126 |
- |
|
| 127 |
- for name, diagnostic := range diagnostics {
|
|
| 128 |
- if canRun, reason := diagnostic.CanRun(); !canRun {
|
|
| 129 |
- if reason == nil {
|
|
| 130 |
- o.Logger.Noticem(log.Message{ID: "diagSkip", Template: "Skipping diagnostic: {{.area}}.{{.name}}\nDescription: {{.diag}}", TemplateData: map[string]string{"area": "node", "name": name, "diag": diagnostic.Description()}})
|
|
| 131 |
- } else {
|
|
| 132 |
- o.Logger.Noticem(log.Message{ID: "diagSkip", Template: "Skipping diagnostic: {{.area}}.{{.name}}\nDescription: {{.diag}}\nBecause: {{.reason}}", TemplateData: map[string]string{"area": "node", "name": name, "diag": diagnostic.Description(), "reason": reason.Error()}})
|
|
| 133 |
- } |
|
| 134 |
- continue |
|
| 135 |
- } |
|
| 136 |
- |
|
| 137 |
- o.Logger.Noticem(log.Message{ID: "diagRun", Template: "Running diagnostic: {{.area}}.{{.name}}\nDescription: {{.diag}}", TemplateData: map[string]string{"area": "node", "name": name, "diag": diagnostic.Description()}})
|
|
| 138 |
- diagnostic.Check() |
|
| 139 |
- } |
|
| 140 |
- |
|
| 141 |
- return o.Logger.ErrorsSeen(), nil |
|
| 142 |
-} |
| ... | ... |
@@ -8,27 +8,27 @@ import ( |
| 8 | 8 |
"github.com/openshift/origin/pkg/diagnostics/log" |
| 9 | 9 |
) |
| 10 | 10 |
|
| 11 |
-type RecommendedLoggerOptionsFlags struct {
|
|
| 11 |
+type LoggerOptionFlags struct {
|
|
| 12 | 12 |
Level FlagInfo |
| 13 | 13 |
Format FlagInfo |
| 14 | 14 |
} |
| 15 | 15 |
|
| 16 | 16 |
// default overrideable flag specifications to be bound to options. |
| 17 |
-func RecommendedLoggerOptionFlags() RecommendedLoggerOptionsFlags {
|
|
| 18 |
- return RecommendedLoggerOptionsFlags{
|
|
| 17 |
+func RecommendedLoggerOptionFlags() LoggerOptionFlags {
|
|
| 18 |
+ return LoggerOptionFlags{
|
|
| 19 | 19 |
Level: FlagInfo{FlagLevelName, "l", "1", "Level of diagnostic output: 4: Error, 3: Warn, 2: Notice, 1: Info, 0: Debug"},
|
| 20 | 20 |
Format: FlagInfo{FlagFormatName, "o", "text", "Output format: text|json|yaml"},
|
| 21 | 21 |
} |
| 22 | 22 |
} |
| 23 | 23 |
|
| 24 |
-func BindLoggerOptionFlags(cmdFlags *pflag.FlagSet, loggerOptions *log.LoggerOptions, flags RecommendedLoggerOptionsFlags) {
|
|
| 24 |
+func BindLoggerOptionFlags(cmdFlags *pflag.FlagSet, loggerOptions *log.LoggerOptions, flags LoggerOptionFlags) {
|
|
| 25 | 25 |
flags.Level.BindIntFlag(cmdFlags, &loggerOptions.Level) |
| 26 | 26 |
flags.Format.BindStringFlag(cmdFlags, &loggerOptions.Format) |
| 27 | 27 |
} |
| 28 | 28 |
|
| 29 | 29 |
// default overrideable flag specifications to be bound to options. |
| 30 | 30 |
func NewRecommendedDiagnosticFlag() FlagInfo {
|
| 31 |
- return FlagInfo{FlagDiagnosticsName, "d", "", `comma-separated list of diagnostic names to run, e.g. "systemd.AnalyzeLogs"`}
|
|
| 31 |
+ return FlagInfo{FlagDiagnosticsName, "d", "", `comma-separated list of diagnostic names to run, e.g. "AnalyzeLogs"`}
|
|
| 32 | 32 |
} |
| 33 | 33 |
|
| 34 | 34 |
func BindDiagnosticFlag(cmdFlags *pflag.FlagSet, diagnostics *util.StringList, flagInfo FlagInfo) {
|
| ... | ... |
@@ -14,10 +14,11 @@ type FlagInfo kclientcmd.FlagInfo // reuse to add methods |
| 14 | 14 |
// with tweaked definitions in different contexts if necessary. |
| 15 | 15 |
|
| 16 | 16 |
func (i FlagInfo) BindStringFlag(flags *pflag.FlagSet, target *string) {
|
| 17 |
- // assume flags with no longname are not desired |
|
| 18 |
- if len(i.LongName) > 0 {
|
|
| 19 |
- flags.StringVarP(target, i.LongName, i.ShortName, i.Default, i.Description) |
|
| 20 |
- } |
|
| 17 |
+ kclientcmd.FlagInfo(i).BindStringFlag(flags, target) |
|
| 18 |
+} |
|
| 19 |
+ |
|
| 20 |
+func (i FlagInfo) BindBoolFlag(flags *pflag.FlagSet, target *bool) {
|
|
| 21 |
+ kclientcmd.FlagInfo(i).BindBoolFlag(flags, target) |
|
| 21 | 22 |
} |
| 22 | 23 |
|
| 23 | 24 |
func (i FlagInfo) BindIntFlag(flags *pflag.FlagSet, target *int) {
|
| ... | ... |
@@ -29,15 +30,6 @@ func (i FlagInfo) BindIntFlag(flags *pflag.FlagSet, target *int) {
|
| 29 | 29 |
} |
| 30 | 30 |
} |
| 31 | 31 |
|
| 32 |
-func (i FlagInfo) BindBoolFlag(flags *pflag.FlagSet, target *bool) {
|
|
| 33 |
- // assume flags with no longname are not desired |
|
| 34 |
- if len(i.LongName) > 0 {
|
|
| 35 |
- // try to parse Default as a bool. If it fails, assume false |
|
| 36 |
- boolVal, _ := strconv.ParseBool(i.Default) |
|
| 37 |
- flags.BoolVarP(target, i.LongName, i.ShortName, boolVal, i.Description) |
|
| 38 |
- } |
|
| 39 |
-} |
|
| 40 |
- |
|
| 41 | 32 |
func (i FlagInfo) BindListFlag(flags *pflag.FlagSet, target *kutil.StringList) {
|
| 42 | 33 |
// assume flags with no longname are not desired |
| 43 | 34 |
if len(i.LongName) > 0 {
|
| ... | ... |
@@ -46,12 +38,11 @@ func (i FlagInfo) BindListFlag(flags *pflag.FlagSet, target *kutil.StringList) {
|
| 46 | 46 |
} |
| 47 | 47 |
|
| 48 | 48 |
const ( |
| 49 |
- FlagAllClientConfigName = "client-config" |
|
| 50 |
- FlagAllMasterConfigName = "master-config" |
|
| 51 |
- FlagAllNodeConfigName = "node-config" |
|
| 52 |
- FlagDiagnosticsName = "diagnostics" |
|
| 53 |
- FlagLevelName = "diaglevel" |
|
| 54 |
- FlagFormatName = "output" |
|
| 55 |
- FlagMasterConfigName = "config" |
|
| 56 |
- FlagNodeConfigName = "config" |
|
| 49 |
+ FlagMasterConfigName = "master-config" |
|
| 50 |
+ FlagNodeConfigName = "node-config" |
|
| 51 |
+ FlagClusterContextName = "cluster-context" |
|
| 52 |
+ FlagDiagnosticsName = "diagnostics" |
|
| 53 |
+ FlagLevelName = "diaglevel" |
|
| 54 |
+ FlagFormatName = "output" |
|
| 55 |
+ FlagIsHostName = "host" |
|
| 57 | 56 |
) |
| ... | ... |
@@ -2,7 +2,7 @@ OpenShift v3 Diagnostics |
| 2 | 2 |
======================== |
| 3 | 3 |
|
| 4 | 4 |
This is a tool to help administrators and users resolve common problems |
| 5 |
-that occur with OpenShift v3 deployments. It is currently (May 2015) |
|
| 5 |
+that occur with OpenShift v3 deployments. It will likely remain |
|
| 6 | 6 |
under continuous development as the OpenShift Origin project progresses. |
| 7 | 7 |
|
| 8 | 8 |
The goals of the diagnostics tool are summarized in this [Trello |
| ... | ... |
@@ -22,32 +22,21 @@ added to the `openshift` binary itself so that wherever there is an |
| 22 | 22 |
OpenShift server or client, the diagnostics can run in the exact same |
| 23 | 23 |
environment. |
| 24 | 24 |
|
| 25 |
-`openshift ex diagnostics` subcommands for master, node, and client |
|
| 26 |
-provide flags to mimic the configurations for those respective components, |
|
| 27 |
-so that running diagnostics against a component should be as simple as |
|
| 28 |
-supplying the same flags that would invoke the component. So, |
|
| 29 |
-for example, if a master is started with: |
|
| 30 |
- |
|
| 31 |
- openshift start master --public-hostname=... |
|
| 32 |
- |
|
| 33 |
-Then diagnostics against that master would simply be run as: |
|
| 34 |
- |
|
| 35 |
- openshift ex diagnostics master --public-hostname=... |
|
| 36 |
- |
|
| 37 |
-In this way it should be possible to invoke diagnostics against any |
|
| 38 |
-given environment. |
|
| 25 |
+Diagnostics looks for config files in standard locations. If not found, |
|
| 26 |
+related diagnostics are just skipped. Non-standard locations can be |
|
| 27 |
+specified with flags. |
|
| 39 | 28 |
|
| 40 | 29 |
Host environment |
| 41 | 30 |
================ |
| 42 | 31 |
|
| 43 |
-However, master/node diagnostics will be most useful in a specific |
|
| 44 |
-target environment, which is a deployment using Enterprise RPMs and |
|
| 45 |
-ansible deployment logic. This provides two major benefits: |
|
| 32 |
+Master/node diagnostics will be most useful in a specific target |
|
| 33 |
+environment, which is a deployment using RPMs and ansible deployment |
|
| 34 |
+logic. This provides two major benefits: |
|
| 46 | 35 |
|
| 47 | 36 |
* master/node configuration is based on a configuration file in a standard location |
| 48 | 37 |
* all components log to journald |
| 49 | 38 |
|
| 50 |
-Having configuration file in standard locations means you will generally |
|
| 39 |
+Having configuration files in standard locations means you will generally |
|
| 51 | 40 |
not even need to specify where to find them. Running: |
| 52 | 41 |
|
| 53 | 42 |
openshift ex diagnostics |
| ... | ... |
@@ -71,14 +60,54 @@ Client environment |
| 71 | 71 |
================== |
| 72 | 72 |
|
| 73 | 73 |
The user may only have access as an ordinary user, as a cluster-admin |
| 74 |
-user, or may have admin on a host where OpenShift master or node services |
|
| 75 |
-are operating. The diagnostics will attempt to use as much access as |
|
| 76 |
-the user has available. |
|
| 74 |
+user, and/or may be running on a host where OpenShift master or node |
|
| 75 |
+services are operating. The diagnostics will attempt to use as much |
|
| 76 |
+access as the user has available. |
|
| 77 | 77 |
|
| 78 | 78 |
A client with ordinary access should be able to diagnose its connection |
| 79 |
-to the master and look for problems in builds and deployments. |
|
| 80 |
- |
|
| 81 |
-A client with cluster-admin access should be able to diagnose the same |
|
| 82 |
-things for every project in the deployment, as well as infrastructure |
|
| 83 |
-status. |
|
| 79 |
+to the master and look for problems in builds and deployments for the |
|
| 80 |
+current context. |
|
| 81 |
+ |
|
| 82 |
+A client with cluster-admin access should be able to diagnose the |
|
| 83 |
+status of infrastructure. |
|
| 84 |
+ |
|
| 85 |
+Writing diagnostics |
|
| 86 |
+=================== |
|
| 87 |
+ |
|
| 88 |
+Developers are encouraged to add to the available diagnostics as they |
|
| 89 |
+encounter problems that are not easily communicated in the normal |
|
| 90 |
+operations of the program, for example components with misconfigured |
|
| 91 |
+connections, problems that are buried in logs, etc. The sanity you |
|
| 92 |
+save may be your own. |
|
| 93 |
+ |
|
| 94 |
+A diagnostic is an object that conforms to the Diagnostic interface |
|
| 95 |
+(see pkg/diagnostics/types/diagnostic.go). The diagnostic object should |
|
| 96 |
+be built in one of the builders in the pkg/cmd/experimental/diagnostics |
|
| 97 |
+package (based on whether it depends on client, cluster-admin, or host |
|
| 98 |
+configuration). When executed, the diagnostic logs its findings into |
|
| 99 |
+a result object. It should be assumed that they may run in parallel. |
|
| 100 |
+ |
|
| 101 |
+Diagnostics should prefer providing information over perfect accuracy, |
|
| 102 |
+as they are the first line of (self-)support for users. On the other |
|
| 103 |
+hand, judgment should be exercised to prevent sending users down useless |
|
| 104 |
+paths or flooding them with non-issues that obscure real problems. |
|
| 105 |
+ |
|
| 106 |
+* Errors should be reserved for things that are almost certainly broken |
|
| 107 |
+ or causing problems, for example a broken URL. |
|
| 108 |
+* Warnings indicate issues that may be a problem but could be valid for |
|
| 109 |
+ some configurations / situations, for example a node being disabled. |
|
| 110 |
+ |
|
| 111 |
+Enabling automation |
|
| 112 |
+=================== |
|
| 113 |
+ |
|
| 114 |
+Diagnostic messages are designed to be logged either for human consumption |
|
| 115 |
+("text" format) or for scripting/automation ("yaml" or "json" formats). So
|
|
| 116 |
+messages should: |
|
| 117 |
+ |
|
| 118 |
+* Have an ID that is unique and unchanging, such that automated alerts |
|
| 119 |
+ could filter on specific IDs rather than rely on message text or level. |
|
| 120 |
+* Log any data that might be relevant in an automated alert as |
|
| 121 |
+ template data; for example, when a node is down, include the name of |
|
| 122 |
+ the node so that automation could decide how important it is. |
|
| 123 |
+* Not put anything in message template data that cannot be serialized. |
|
| 84 | 124 |
|
| ... | ... |
@@ -3,14 +3,25 @@ package client |
| 3 | 3 |
import ( |
| 4 | 4 |
"errors" |
| 5 | 5 |
"fmt" |
| 6 |
+ "regexp" |
|
| 7 |
+ "strings" |
|
| 6 | 8 |
|
| 7 | 9 |
kapi "github.com/GoogleCloudPlatform/kubernetes/pkg/api" |
| 10 |
+ kclientcmd "github.com/GoogleCloudPlatform/kubernetes/pkg/client/clientcmd" |
|
| 8 | 11 |
kclientcmdapi "github.com/GoogleCloudPlatform/kubernetes/pkg/client/clientcmd/api" |
| 12 |
+ "github.com/GoogleCloudPlatform/kubernetes/pkg/fields" |
|
| 13 |
+ "github.com/GoogleCloudPlatform/kubernetes/pkg/labels" |
|
| 9 | 14 |
|
| 15 |
+ osclientcmd "github.com/openshift/origin/pkg/cmd/util/clientcmd" |
|
| 10 | 16 |
"github.com/openshift/origin/pkg/diagnostics/log" |
| 11 |
- "github.com/openshift/origin/pkg/diagnostics/types/diagnostic" |
|
| 17 |
+ "github.com/openshift/origin/pkg/diagnostics/types" |
|
| 12 | 18 |
) |
| 13 | 19 |
|
| 20 |
+type ConfigContext struct {
|
|
| 21 |
+ RawConfig *kclientcmdapi.Config |
|
| 22 |
+ ContextName string |
|
| 23 |
+} |
|
| 24 |
+ |
|
| 14 | 25 |
const ( |
| 15 | 26 |
currentContextMissing = `Your client config specifies a current context of '{{.context}}'
|
| 16 | 27 |
which is not defined; it is likely that a mistake was introduced while |
| ... | ... |
@@ -22,20 +33,145 @@ useful to use this as a base if available.` |
| 22 | 22 |
currentContextSummary = `The current context from client config is '{{.context}}'
|
| 23 | 23 |
This will be used by default to contact your OpenShift server. |
| 24 | 24 |
` |
| 25 |
+ contextDesc = ` |
|
| 26 |
+For client config context '{{.context}}':
|
|
| 27 |
+The server URL is '{{.server}}'
|
|
| 28 |
+The user authentication is '{{.user}}'
|
|
| 29 |
+The current project is '{{.project}}'
|
|
| 30 |
+` |
|
| 31 |
+ currContextDesc = ` |
|
| 32 |
+The current client config context is '{{.context}}':
|
|
| 33 |
+The server URL is '{{.server}}'
|
|
| 34 |
+The user authentication is '{{.user}}'
|
|
| 35 |
+The current project is '{{.project}}'
|
|
| 36 |
+` |
|
| 37 |
+ clientNoResolve = ` |
|
| 38 |
+This usually means that the hostname does not resolve to an IP. |
|
| 39 |
+Hostnames should usually be resolved via DNS or an /etc/hosts file. |
|
| 40 |
+Ensure that the hostname resolves correctly from your host before proceeding. |
|
| 41 |
+Of course, your config could also simply have the wrong hostname specified. |
|
| 42 |
+` |
|
| 43 |
+ clientUnknownCa = ` |
|
| 44 |
+This means that we cannot validate the certificate in use by the |
|
| 45 |
+OpenShift API server, so we cannot securely communicate with it. |
|
| 46 |
+Connections could be intercepted and your credentials stolen. |
|
| 47 |
+ |
|
| 48 |
+Since the server certificate we see when connecting is not validated |
|
| 49 |
+by public certificate authorities (CAs), you probably need to specify a |
|
| 50 |
+certificate from a private CA to validate the connection. |
|
| 51 |
+ |
|
| 52 |
+Your config may be specifying the wrong CA cert, or none, or there |
|
| 53 |
+could actually be a man-in-the-middle attempting to intercept your |
|
| 54 |
+connection. If you are unconcerned about any of this, you can add the |
|
| 55 |
+--insecure-skip-tls-verify flag to bypass secure (TLS) verification, |
|
| 56 |
+but this is risky and should not be necessary. |
|
| 57 |
+** Connections could be intercepted and your credentials stolen. ** |
|
| 58 |
+` |
|
| 59 |
+ clientUnneededCa = ` |
|
| 60 |
+This means that for client connections to the OpenShift API server, you |
|
| 61 |
+(or your kubeconfig) specified both a validating certificate authority |
|
| 62 |
+and that the client should bypass connection security validation. |
|
| 63 |
+ |
|
| 64 |
+This is not allowed because it is likely to be a mistake. |
|
| 65 |
+ |
|
| 66 |
+If you want to use --insecure-skip-tls-verify to bypass security (which |
|
| 67 |
+is usually a bad idea anyway), then you need to also clear the CA cert |
|
| 68 |
+from your command line options or kubeconfig file(s). Of course, it |
|
| 69 |
+would be far better to obtain and use a correct CA cert. |
|
| 70 |
+` |
|
| 71 |
+ clientInvCertName = ` |
|
| 72 |
+This means that the certificate in use by the OpenShift API server |
|
| 73 |
+(master) does not match the hostname by which you are addressing it: |
|
| 74 |
+ %s |
|
| 75 |
+so a secure connection is not allowed. In theory, this *could* mean that |
|
| 76 |
+someone is intercepting your connection and presenting a certificate |
|
| 77 |
+that is valid but for a different server, which is why secure validation |
|
| 78 |
+fails in this case. |
|
| 79 |
+ |
|
| 80 |
+However, the most likely explanation is that the server certificate |
|
| 81 |
+needs to be updated to include the name you are using to reach it. |
|
| 82 |
+ |
|
| 83 |
+If the OpenShift server is generating its own certificates (which |
|
| 84 |
+is default), then the --public-master flag on the OpenShift master is |
|
| 85 |
+usually the easiest way to do this. If you need something more complicated |
|
| 86 |
+(for instance, multiple public addresses for the API, or your own CA), |
|
| 87 |
+then you will need to custom-generate the server certificate with the |
|
| 88 |
+right names yourself. |
|
| 89 |
+ |
|
| 90 |
+If you are unconcerned about any of this, you can add the |
|
| 91 |
+--insecure-skip-tls-verify flag to bypass secure (TLS) verification, |
|
| 92 |
+but this is risky and should not be necessary. |
|
| 93 |
+** Connections could be intercepted and your credentials stolen. ** |
|
| 94 |
+` |
|
| 95 |
+ clientConnRefused = ` |
|
| 96 |
+This means that when we tried to connect to the OpenShift API |
|
| 97 |
+server (master), we reached the host, but nothing accepted the port |
|
| 98 |
+connection. This could mean that the OpenShift master is stopped, or |
|
| 99 |
+that a firewall or security policy is blocking access at that port. |
|
| 100 |
+ |
|
| 101 |
+You will not be able to connect or do anything at all with OpenShift |
|
| 102 |
+until this server problem is resolved or you specify a corrected |
|
| 103 |
+server address.` |
|
| 104 |
+ |
|
| 105 |
+ clientConnTimeout = ` |
|
| 106 |
+This means that when we tried to connect to the OpenShift API server |
|
| 107 |
+(master), we could not reach the host at all. |
|
| 108 |
+* You may have specified the wrong host address. |
|
| 109 |
+* This could mean the host is completely unavailable (down). |
|
| 110 |
+* This could indicate a routing problem or a firewall that simply |
|
| 111 |
+ drops requests rather than responding by reseting the connection. |
|
| 112 |
+* It does not generally mean that DNS name resolution failed (which |
|
| 113 |
+ would be a different error) though the problem could be that it |
|
| 114 |
+ gave the wrong address.` |
|
| 115 |
+ clientMalformedHTTP = ` |
|
| 116 |
+This means that when we tried to connect to the OpenShift API server |
|
| 117 |
+(master) with a plain HTTP connection, the server did not speak |
|
| 118 |
+HTTP back to us. The most common explanation is that a secure server |
|
| 119 |
+is listening but you specified an http: connection instead of https:. |
|
| 120 |
+There could also be another service listening at the intended port |
|
| 121 |
+speaking some other protocol entirely. |
|
| 122 |
+ |
|
| 123 |
+You will not be able to connect or do anything at all with OpenShift |
|
| 124 |
+until this server problem is resolved or you specify a corrected |
|
| 125 |
+server address.` |
|
| 126 |
+ clientMalformedTLS = ` |
|
| 127 |
+This means that when we tried to connect to the OpenShift API server |
|
| 128 |
+(master) with a secure HTTPS connection, the server did not speak |
|
| 129 |
+HTTPS back to us. The most common explanation is that the server |
|
| 130 |
+listening at that port is not the secure server you expected - it |
|
| 131 |
+may be a non-secure HTTP server or the wrong service may be |
|
| 132 |
+listening there, or you may have specified an incorrect port. |
|
| 133 |
+ |
|
| 134 |
+You will not be able to connect or do anything at all with OpenShift |
|
| 135 |
+until this server problem is resolved or you specify a corrected |
|
| 136 |
+server address.` |
|
| 137 |
+ clientUnauthn = ` |
|
| 138 |
+This means that when we tried to make a request to the OpenShift API |
|
| 139 |
+server, your kubeconfig did not present valid credentials to |
|
| 140 |
+authenticate your client. Credentials generally consist of a client |
|
| 141 |
+key/certificate or an access token. Your kubeconfig may not have |
|
| 142 |
+presented any, or they may be invalid.` |
|
| 143 |
+ clientUnauthz = ` |
|
| 144 |
+This means that when we tried to make a request to the OpenShift API |
|
| 145 |
+server, the request required credentials that were not presented. |
|
| 146 |
+This can happen when an authentication token expires. Try logging in |
|
| 147 |
+with this user again.` |
|
| 25 | 148 |
) |
| 26 | 149 |
|
| 27 |
-type ConfigContext struct {
|
|
| 28 |
- KubeConfig *kclientcmdapi.Config |
|
| 29 |
- ContextName string |
|
| 150 |
+var ( |
|
| 151 |
+ invalidCertNameRx = regexp.MustCompile("x509: certificate is valid for (\\S+, )+not (\\S+)")
|
|
| 152 |
+) |
|
| 30 | 153 |
|
| 31 |
- Log *log.Logger |
|
| 154 |
+func (d ConfigContext) Name() string {
|
|
| 155 |
+ return fmt.Sprintf("ConfigContext[%s]", d.ContextName)
|
|
| 32 | 156 |
} |
| 33 | 157 |
|
| 34 | 158 |
func (d ConfigContext) Description() string {
|
| 35 |
- return "Test that client config contexts have no undefined references" |
|
| 159 |
+ return "Validate client config context is complete and has connectivity" |
|
| 36 | 160 |
} |
| 161 |
+ |
|
| 37 | 162 |
func (d ConfigContext) CanRun() (bool, error) {
|
| 38 |
- if d.KubeConfig == nil {
|
|
| 163 |
+ if d.RawConfig == nil {
|
|
| 39 | 164 |
// TODO make prettier? |
| 40 | 165 |
return false, errors.New("There is no client config file")
|
| 41 | 166 |
} |
| ... | ... |
@@ -46,13 +182,13 @@ func (d ConfigContext) CanRun() (bool, error) {
|
| 46 | 46 |
|
| 47 | 47 |
return true, nil |
| 48 | 48 |
} |
| 49 |
-func (d ConfigContext) Check() (bool, []log.Message, []error, []error) {
|
|
| 50 |
- if _, err := d.CanRun(); err != nil {
|
|
| 51 |
- return false, nil, nil, []error{err}
|
|
| 52 |
- } |
|
| 53 | 49 |
|
| 54 |
- isDefaultContext := d.KubeConfig.CurrentContext == d.ContextName |
|
| 50 |
+func (d ConfigContext) Check() *types.DiagnosticResult {
|
|
| 51 |
+ r := types.NewDiagnosticResult("ConfigContext")
|
|
| 55 | 52 |
|
| 53 |
+ isDefaultContext := d.RawConfig.CurrentContext == d.ContextName |
|
| 54 |
+ |
|
| 55 |
+ // prepare bad news message |
|
| 56 | 56 |
errorKey := "clientCfgError" |
| 57 | 57 |
unusableLine := fmt.Sprintf("The client config context '%s' is unusable", d.ContextName)
|
| 58 | 58 |
if isDefaultContext {
|
| ... | ... |
@@ -60,40 +196,92 @@ func (d ConfigContext) Check() (bool, []log.Message, []error, []error) {
|
| 60 | 60 |
unusableLine = fmt.Sprintf("The current client config context '%s' is unusable", d.ContextName)
|
| 61 | 61 |
} |
| 62 | 62 |
|
| 63 |
- context, exists := d.KubeConfig.Contexts[d.ContextName] |
|
| 63 |
+ // check that the context and its constitutuents are defined in the kubeconfig |
|
| 64 |
+ context, exists := d.RawConfig.Contexts[d.ContextName] |
|
| 64 | 65 |
if !exists {
|
| 65 |
- err := diagnostic.NewDiagnosticError(errorKey, "", fmt.Errorf(unusableLine+":\n Client config context '%s' is not defined.", d.ContextName)) |
|
| 66 |
- d.Log.Error(err.ID, err.Cause.Error()) |
|
| 67 |
- return false, nil, nil, []error{err}
|
|
| 66 |
+ r.Errorf(errorKey, nil, "%s:\n Client config context '%s' is not defined.", unusableLine, d.ContextName) |
|
| 67 |
+ return r |
|
| 68 | 68 |
} |
| 69 |
- |
|
| 70 | 69 |
clusterName := context.Cluster |
| 71 |
- cluster, exists := d.KubeConfig.Clusters[clusterName] |
|
| 70 |
+ cluster, exists := d.RawConfig.Clusters[clusterName] |
|
| 72 | 71 |
if !exists {
|
| 73 |
- |
|
| 74 |
- err := diagnostic.NewDiagnosticError(errorKey, "", fmt.Errorf(unusableLine+":\n Client config context '%s' has a cluster '%s' which is not defined.", d.ContextName, clusterName)) |
|
| 75 |
- d.Log.Error(err.ID, err.Cause.Error()) |
|
| 76 |
- return false, nil, nil, []error{err}
|
|
| 72 |
+ r.Errorf(errorKey, nil, "%s:\n Client config context '%s' has a cluster '%s' which is not defined.", unusableLine, d.ContextName, clusterName) |
|
| 73 |
+ return r |
|
| 77 | 74 |
} |
| 78 | 75 |
authName := context.AuthInfo |
| 79 |
- if _, exists := d.KubeConfig.AuthInfos[authName]; !exists {
|
|
| 80 |
- |
|
| 81 |
- err := diagnostic.NewDiagnosticError(errorKey, "", fmt.Errorf(unusableLine+":\n Client config context '%s' has a user identity '%s' which is not defined.", d.ContextName, authName)) |
|
| 82 |
- d.Log.Error(err.ID, err.Cause.Error()) |
|
| 83 |
- return false, nil, nil, []error{err}
|
|
| 76 |
+ if _, exists := d.RawConfig.AuthInfos[authName]; !exists {
|
|
| 77 |
+ r.Errorf(errorKey, nil, "%s:\n Client config context '%s' has a user identity '%s' which is not defined.", unusableLine, d.ContextName, authName) |
|
| 78 |
+ return r |
|
| 84 | 79 |
} |
| 85 | 80 |
|
| 81 |
+ // we found a fully-defined context |
|
| 86 | 82 |
project := context.Namespace |
| 87 | 83 |
if project == "" {
|
| 88 | 84 |
project = kapi.NamespaceDefault // OpenShift/k8s fills this in if missing |
| 85 |
+ } |
|
| 86 |
+ msgData := log.Hash{"context": d.ContextName, "server": cluster.Server, "user": authName, "project": project}
|
|
| 87 |
+ msgText := contextDesc |
|
| 88 |
+ if isDefaultContext {
|
|
| 89 |
+ msgText = currContextDesc |
|
| 90 |
+ } |
|
| 89 | 91 |
|
| 92 |
+ // Actually send a request to see if context has connectivity. |
|
| 93 |
+ // Note: we cannot reuse factories as they cache the clients, so build new factory for each context. |
|
| 94 |
+ osClient, _, err := osclientcmd.NewFactory(kclientcmd.NewDefaultClientConfig(*d.RawConfig, &kclientcmd.ConfigOverrides{Context: *context})).Clients()
|
|
| 95 |
+ // client create now fails if cannot connect to server, so address connectivity errors below |
|
| 96 |
+ if err == nil {
|
|
| 97 |
+ if projects, projerr := osClient.Projects().List(labels.Everything(), fields.Everything()); projerr != nil {
|
|
| 98 |
+ err = projerr |
|
| 99 |
+ } else { // success!
|
|
| 100 |
+ list := []string{}
|
|
| 101 |
+ for i, project := range projects.Items {
|
|
| 102 |
+ if i > 9 {
|
|
| 103 |
+ list = append(list, "...") |
|
| 104 |
+ break |
|
| 105 |
+ } |
|
| 106 |
+ list = append(list, project.Name) |
|
| 107 |
+ } |
|
| 108 |
+ msgData["projects"] = list |
|
| 109 |
+ if len(list) == 0 {
|
|
| 110 |
+ r.Infot("CCctxSuccess", msgText+"Successfully requested project list, but it is empty, so user has no access to anything.", msgData)
|
|
| 111 |
+ } else {
|
|
| 112 |
+ r.Infot("CCctxSuccess", msgText+"Successfully requested project list; has access to project(s):\n {{.projects}}", msgData)
|
|
| 113 |
+ } |
|
| 114 |
+ return r |
|
| 115 |
+ } |
|
| 90 | 116 |
} |
| 91 | 117 |
|
| 92 |
- // TODO: actually send a request to see if can connect |
|
| 93 |
- message := log.Message{EvaluatedText: fmt.Sprintf("For client config context '%s':\n The server URL is '%s'\nThe user authentication is '%s'\nThe current project is '%s'", d.ContextName, cluster.Server, authName, project)}
|
|
| 94 |
- if isDefaultContext {
|
|
| 95 |
- message = log.Message{EvaluatedText: fmt.Sprintf("The current client config context is '%s':\n The server URL is '%s'\nThe user authentication is '%s'\nThe current project is '%s'", d.ContextName, cluster.Server, authName, project)}
|
|
| 118 |
+ // something went wrong; couldn't create client or get project list. |
|
| 119 |
+ // interpret the terse error messages with helpful info. |
|
| 120 |
+ errMsg := err.Error() |
|
| 121 |
+ msgData["errMsg"] = fmt.Sprintf("(%T) %[1]v", err)
|
|
| 122 |
+ var reason, errId string |
|
| 123 |
+ switch {
|
|
| 124 |
+ case regexp.MustCompile("dial tcp: lookup (\\S+): no such host").MatchString(errMsg):
|
|
| 125 |
+ errId, reason = "clientNoResolve", clientNoResolve |
|
| 126 |
+ case strings.Contains(errMsg, "x509: certificate signed by unknown authority"): |
|
| 127 |
+ errId, reason = "clientUnknownCa", clientUnknownCa |
|
| 128 |
+ case strings.Contains(errMsg, "specifying a root certificates file with the insecure flag is not allowed"): |
|
| 129 |
+ errId, reason = "clientUnneededCa", clientUnneededCa |
|
| 130 |
+ case invalidCertNameRx.MatchString(errMsg): |
|
| 131 |
+ match := invalidCertNameRx.FindStringSubmatch(errMsg) |
|
| 132 |
+ serverHost := match[len(match)-1] |
|
| 133 |
+ errId, reason = "clientInvCertName", fmt.Sprintf(clientInvCertName, serverHost) |
|
| 134 |
+ case regexp.MustCompile("dial tcp (\\S+): connection refused").MatchString(errMsg):
|
|
| 135 |
+ errId, reason = "clientConnRefused", clientConnRefused |
|
| 136 |
+ case regexp.MustCompile("dial tcp (\\S+): (?:connection timed out|i/o timeout|no route to host)").MatchString(errMsg):
|
|
| 137 |
+ errId, reason = "clientConnTimeout", clientConnTimeout |
|
| 138 |
+ case strings.Contains(errMsg, "malformed HTTP response"): |
|
| 139 |
+ errId, reason = "clientMalformedHTTP", clientMalformedHTTP |
|
| 140 |
+ case strings.Contains(errMsg, "tls: oversized record received with length"): |
|
| 141 |
+ errId, reason = "clientMalformedTLS", clientMalformedTLS |
|
| 142 |
+ case strings.Contains(errMsg, `403 Forbidden: Forbidden: "/osapi/v1beta1/projects?namespace=" denied by default`): |
|
| 143 |
+ errId, reason = "clientUnauthn", clientUnauthn |
|
| 144 |
+ case regexp.MustCompile("401 Unauthorized: Unauthorized$").MatchString(errMsg):
|
|
| 145 |
+ errId, reason = "clientUnauthz", clientUnauthz |
|
| 146 |
+ default: |
|
| 147 |
+ errId, reason = "clientUnknownConnErr", `Diagnostics does not have an explanation for what this means. Please report this error so one can be added.` |
|
| 96 | 148 |
} |
| 97 |
- d.Log.LogMessage(log.InfoLevel, message) |
|
| 98 |
- return true, []log.Message{message}, nil, nil
|
|
| 149 |
+ r.Errort(errId, err, msgText+"{{.errMsg}}\n"+reason, msgData)
|
|
| 150 |
+ return r |
|
| 99 | 151 |
} |
| 100 | 152 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,151 @@ |
| 0 |
+package client |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "fmt" |
|
| 4 |
+ "io/ioutil" |
|
| 5 |
+ "os" |
|
| 6 |
+ |
|
| 7 |
+ "github.com/GoogleCloudPlatform/kubernetes/pkg/client/clientcmd" |
|
| 8 |
+ flag "github.com/spf13/pflag" |
|
| 9 |
+ |
|
| 10 |
+ "github.com/openshift/origin/pkg/cmd/cli/config" |
|
| 11 |
+ "github.com/openshift/origin/pkg/diagnostics/log" |
|
| 12 |
+ "github.com/openshift/origin/pkg/diagnostics/types" |
|
| 13 |
+) |
|
| 14 |
+ |
|
| 15 |
+// This diagnostic is a little special in that it is run separately as a precondition |
|
| 16 |
+// in order to determine whether we can run other dependent diagnostics |
|
| 17 |
+ |
|
| 18 |
+type ConfigLoading struct {
|
|
| 19 |
+ ConfFlagName string |
|
| 20 |
+ ClientFlags *flag.FlagSet |
|
| 21 |
+ successfulLoad bool // set if at least one file loaded |
|
| 22 |
+} |
|
| 23 |
+ |
|
| 24 |
+func (d *ConfigLoading) Name() string {
|
|
| 25 |
+ return "ConfigLoading" |
|
| 26 |
+} |
|
| 27 |
+ |
|
| 28 |
+func (d *ConfigLoading) Description() string {
|
|
| 29 |
+ return "Try to load client config file(s) and report what happens" |
|
| 30 |
+} |
|
| 31 |
+ |
|
| 32 |
+func (d *ConfigLoading) CanRun() (bool, error) {
|
|
| 33 |
+ return true, nil |
|
| 34 |
+} |
|
| 35 |
+ |
|
| 36 |
+func (d *ConfigLoading) SuccessfulLoad() bool {
|
|
| 37 |
+ return d.successfulLoad |
|
| 38 |
+} |
|
| 39 |
+ |
|
| 40 |
+func (d *ConfigLoading) Check() *types.DiagnosticResult {
|
|
| 41 |
+ r := types.NewDiagnosticResult("ConfigLoading")
|
|
| 42 |
+ confFlagValue := d.ClientFlags.Lookup(d.ConfFlagName).Value.String() |
|
| 43 |
+ |
|
| 44 |
+ var foundPath string |
|
| 45 |
+ rules := config.NewOpenShiftClientConfigLoadingRules() |
|
| 46 |
+ paths := append([]string{confFlagValue}, rules.Precedence...)
|
|
| 47 |
+ for index, path := range paths {
|
|
| 48 |
+ errmsg := "" |
|
| 49 |
+ switch index {
|
|
| 50 |
+ case 0: |
|
| 51 |
+ errmsg = fmt.Sprintf("--%s specified that client config should be at %s\n", d.ConfFlagName, path)
|
|
| 52 |
+ case len(paths) - 1: // config in ~/.kube |
|
| 53 |
+ // no error message indicated if it is not there... user didn't say it would be |
|
| 54 |
+ default: // can be multiple paths from the env var in theory; all cases should go here |
|
| 55 |
+ if len(os.Getenv(config.OpenShiftConfigPathEnvVar)) != 0 {
|
|
| 56 |
+ errmsg = fmt.Sprintf("Env var %s specified that client config could be at %s\n", config.OpenShiftConfigPathEnvVar, path)
|
|
| 57 |
+ } |
|
| 58 |
+ } |
|
| 59 |
+ |
|
| 60 |
+ if d.canOpenConfigFile(path, errmsg, r) && foundPath == "" {
|
|
| 61 |
+ d.successfulLoad = true |
|
| 62 |
+ foundPath = path |
|
| 63 |
+ } |
|
| 64 |
+ } |
|
| 65 |
+ if foundPath != "" {
|
|
| 66 |
+ if confFlagValue != "" && confFlagValue != foundPath {
|
|
| 67 |
+ // found config but not where --config said |
|
| 68 |
+ r.Errorf("discCCnotFlag", nil, `
|
|
| 69 |
+The client configuration file was not found where the --%s flag indicated: |
|
| 70 |
+ %s |
|
| 71 |
+A config file was found at the following location: |
|
| 72 |
+ %s |
|
| 73 |
+If you wish to use this file for client configuration, you can specify it |
|
| 74 |
+with the --%[1]s flag, or just not specify the flag. |
|
| 75 |
+ `, d.ConfFlagName, confFlagValue, foundPath) |
|
| 76 |
+ } |
|
| 77 |
+ } else { // not found, check for master-generated ones to recommend
|
|
| 78 |
+ if confFlagValue != "" {
|
|
| 79 |
+ r.Errorf("discCCnotFlag", nil, "Did not find config file where --%s=%s indicated", d.ConfFlagName, confFlagValue)
|
|
| 80 |
+ } |
|
| 81 |
+ adminWarningF := ` |
|
| 82 |
+No client config file was available; however, one exists at |
|
| 83 |
+ %[2]s |
|
| 84 |
+which may have been generated automatically by the master. |
|
| 85 |
+If you want to use this config, you should copy it to the |
|
| 86 |
+standard location (%[3]s), |
|
| 87 |
+or you can set the environment variable %[1]s: |
|
| 88 |
+ export %[1]s=%[2]s |
|
| 89 |
+If not, obtain a config file and place it in the standard |
|
| 90 |
+location for use by the client and diagnostics. |
|
| 91 |
+` |
|
| 92 |
+ adminPaths := []string{
|
|
| 93 |
+ "/etc/openshift/master/admin.kubeconfig", // enterprise |
|
| 94 |
+ "/openshift.local.config/master/admin.kubeconfig", // origin systemd |
|
| 95 |
+ "./openshift.local.config/master/admin.kubeconfig", // origin binary |
|
| 96 |
+ } |
|
| 97 |
+ // look for it in auto-generated locations when not found properly |
|
| 98 |
+ for _, path := range adminPaths {
|
|
| 99 |
+ msg := fmt.Sprintf("Looking for a possible client config at %s\n", path)
|
|
| 100 |
+ if d.canOpenConfigFile(path, msg, r) {
|
|
| 101 |
+ r.Warnf("discCCautoPath", nil, adminWarningF, config.OpenShiftConfigPathEnvVar, path, config.RecommendedHomeFile)
|
|
| 102 |
+ break |
|
| 103 |
+ } |
|
| 104 |
+ } |
|
| 105 |
+ } |
|
| 106 |
+ return r |
|
| 107 |
+} |
|
| 108 |
+ |
|
| 109 |
+// ---------------------------------------------------------- |
|
| 110 |
+// Attempt to open file at path as client config |
|
| 111 |
+// If there is a problem and errmsg is set, log an error |
|
| 112 |
+func (d ConfigLoading) canOpenConfigFile(path string, errmsg string, r *types.DiagnosticResult) bool {
|
|
| 113 |
+ var file *os.File |
|
| 114 |
+ var err error |
|
| 115 |
+ if path == "" { // empty param/envvar
|
|
| 116 |
+ return false |
|
| 117 |
+ } else if file, err = os.Open(path); err == nil {
|
|
| 118 |
+ r.Debugt("discOpenCC", "Reading client config at {{.path}}", log.Hash{"path": path})
|
|
| 119 |
+ } else if errmsg == "" {
|
|
| 120 |
+ r.Debugf("discOpenCCNo", "Could not read client config at %s:\n%#v", path, err)
|
|
| 121 |
+ } else if os.IsNotExist(err) {
|
|
| 122 |
+ r.Debug("discOpenCCNoExist", errmsg+"but that file does not exist.")
|
|
| 123 |
+ } else if os.IsPermission(err) {
|
|
| 124 |
+ r.Error("discOpenCCNoPerm", err, errmsg+"but lack permission to read that file.")
|
|
| 125 |
+ } else {
|
|
| 126 |
+ r.Errorf("discOpenCCErr", err, "%sbut there was an error opening it:\n%#v", errmsg, err)
|
|
| 127 |
+ } |
|
| 128 |
+ if file != nil { // it is open for reading
|
|
| 129 |
+ defer file.Close() |
|
| 130 |
+ if buffer, err := ioutil.ReadAll(file); err != nil {
|
|
| 131 |
+ r.Errorf("discCCReadErr", err, "Unexpected error while reading client config file (%s): %v", path, err)
|
|
| 132 |
+ } else if _, err := clientcmd.Load(buffer); err != nil {
|
|
| 133 |
+ r.Errorf("discCCYamlErr", err, `
|
|
| 134 |
+Error reading YAML from client config file (%s): |
|
| 135 |
+ %v |
|
| 136 |
+This file may have been truncated or mis-edited. |
|
| 137 |
+Please fix, remove, or obtain a new client config`, file.Name(), err) |
|
| 138 |
+ } else {
|
|
| 139 |
+ r.Infof("discCCRead", "Successfully read a client config file at '%s'", path)
|
|
| 140 |
+ /* Note, we're not going to use this config file directly. |
|
| 141 |
+ * Instead, we'll defer to the openshift client code to assimilate |
|
| 142 |
+ * flags, env vars, and the potential hierarchy of config files |
|
| 143 |
+ * into an actual configuration that the client uses. |
|
| 144 |
+ * However, for diagnostic purposes, record the files we find. |
|
| 145 |
+ */ |
|
| 146 |
+ return true |
|
| 147 |
+ } |
|
| 148 |
+ } |
|
| 149 |
+ return false |
|
| 150 |
+} |
| 0 | 151 |
deleted file mode 100644 |
| ... | ... |
@@ -1,100 +0,0 @@ |
| 1 |
-package client |
|
| 2 |
- |
|
| 3 |
-import ( |
|
| 4 |
- "errors" |
|
| 5 |
- "fmt" |
|
| 6 |
- |
|
| 7 |
- kapi "github.com/GoogleCloudPlatform/kubernetes/pkg/api" |
|
| 8 |
- kclient "github.com/GoogleCloudPlatform/kubernetes/pkg/client" |
|
| 9 |
- |
|
| 10 |
- "github.com/GoogleCloudPlatform/kubernetes/pkg/fields" |
|
| 11 |
- "github.com/GoogleCloudPlatform/kubernetes/pkg/labels" |
|
| 12 |
- "github.com/openshift/origin/pkg/diagnostics/log" |
|
| 13 |
- "github.com/openshift/origin/pkg/diagnostics/types/diagnostic" |
|
| 14 |
-) |
|
| 15 |
- |
|
| 16 |
-const ( |
|
| 17 |
- clientErrorGettingNodes = `Client error while retrieving node records. Client retrieved records |
|
| 18 |
-during discovery, so this is likely to be a transient error. Try running |
|
| 19 |
-diagnostics again. If this message persists, there may be a permissions |
|
| 20 |
-problem with getting node records. The error was: |
|
| 21 |
- |
|
| 22 |
-(%T) %[1]v` |
|
| 23 |
- |
|
| 24 |
- nodeNotReady = `Node {{.node}} is defined but is not marked as ready.
|
|
| 25 |
-Ready status is {{.status}} because "{{.reason}}"
|
|
| 26 |
-If the node is not intentionally disabled, check that the master can |
|
| 27 |
-reach the node hostname for a health check and the node is checking in |
|
| 28 |
-to the master with the same hostname. |
|
| 29 |
- |
|
| 30 |
-While in this state, pods should not be scheduled to deploy on the node, |
|
| 31 |
-and any existing scheduled pods will be considered failed and removed. |
|
| 32 |
-` |
|
| 33 |
-) |
|
| 34 |
- |
|
| 35 |
-// NodeDefinitions |
|
| 36 |
-type NodeDefinition struct {
|
|
| 37 |
- KubeClient *kclient.Client |
|
| 38 |
- |
|
| 39 |
- Log *log.Logger |
|
| 40 |
-} |
|
| 41 |
- |
|
| 42 |
-func (d NodeDefinition) Description() string {
|
|
| 43 |
- return "Check node records on master" |
|
| 44 |
-} |
|
| 45 |
-func (d NodeDefinition) CanRun() (bool, error) {
|
|
| 46 |
- if d.KubeClient == nil {
|
|
| 47 |
- // TODO make prettier? |
|
| 48 |
- return false, errors.New("must have kube client")
|
|
| 49 |
- } |
|
| 50 |
- if _, err := d.KubeClient.Nodes().List(labels.LabelSelector{}, fields.Everything()); err != nil {
|
|
| 51 |
- // TODO check for 403 to return: "Client does not have cluster-admin access and cannot see node records" |
|
| 52 |
- |
|
| 53 |
- return false, diagnostic.NewDiagnosticError("clGetNodesFailed", fmt.Sprintf(clientErrorGettingNodes, err), err)
|
|
| 54 |
- } |
|
| 55 |
- |
|
| 56 |
- return true, nil |
|
| 57 |
-} |
|
| 58 |
-func (d NodeDefinition) Check() (bool, []log.Message, []error, []error) {
|
|
| 59 |
- if _, err := d.CanRun(); err != nil {
|
|
| 60 |
- return false, nil, nil, []error{err}
|
|
| 61 |
- } |
|
| 62 |
- |
|
| 63 |
- nodes, err := d.KubeClient.Nodes().List(labels.LabelSelector{}, fields.Everything())
|
|
| 64 |
- if err != nil {
|
|
| 65 |
- return false, nil, nil, []error{
|
|
| 66 |
- diagnostic.NewDiagnosticError("clGetNodesFailed", fmt.Sprintf(clientErrorGettingNodes, err), err),
|
|
| 67 |
- } |
|
| 68 |
- } |
|
| 69 |
- |
|
| 70 |
- for _, node := range nodes.Items {
|
|
| 71 |
- var ready *kapi.NodeCondition |
|
| 72 |
- for i, condition := range node.Status.Conditions {
|
|
| 73 |
- switch condition.Type {
|
|
| 74 |
- // currently only one... used to be more, may be again |
|
| 75 |
- case kapi.NodeReady: |
|
| 76 |
- ready = &node.Status.Conditions[i] |
|
| 77 |
- // TODO comment needed to explain why we do last one wins. should this break instead? |
|
| 78 |
- } |
|
| 79 |
- } |
|
| 80 |
- |
|
| 81 |
- if ready == nil || ready.Status != kapi.ConditionTrue {
|
|
| 82 |
- // instead of building this, simply use the node object directly |
|
| 83 |
- templateData := map[string]interface{}{}
|
|
| 84 |
- templateData["node"] = node.Name |
|
| 85 |
- if ready == nil {
|
|
| 86 |
- templateData["status"] = "None" |
|
| 87 |
- templateData["reason"] = "There is no readiness record." |
|
| 88 |
- } else {
|
|
| 89 |
- templateData["status"] = ready.Status |
|
| 90 |
- templateData["reason"] = ready.Reason |
|
| 91 |
- } |
|
| 92 |
- |
|
| 93 |
- return false, nil, []error{
|
|
| 94 |
- diagnostic.NewDiagnosticErrorFromTemplate("clNodeBroken", nodeNotReady, templateData),
|
|
| 95 |
- }, nil |
|
| 96 |
- } |
|
| 97 |
- } |
|
| 98 |
- |
|
| 99 |
- return true, nil, nil, nil |
|
| 100 |
-} |
| 101 | 1 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,116 @@ |
| 0 |
+package cluster |
|
| 1 |
+ |
|
| 2 |
+// The purpose of this diagnostic is to detect nodes that are out of commission |
|
| 3 |
+// (which may affect the ability to schedule pods) for user awareness. |
|
| 4 |
+ |
|
| 5 |
+import ( |
|
| 6 |
+ "errors" |
|
| 7 |
+ "fmt" |
|
| 8 |
+ |
|
| 9 |
+ kapi "github.com/GoogleCloudPlatform/kubernetes/pkg/api" |
|
| 10 |
+ kclient "github.com/GoogleCloudPlatform/kubernetes/pkg/client" |
|
| 11 |
+ |
|
| 12 |
+ "github.com/GoogleCloudPlatform/kubernetes/pkg/fields" |
|
| 13 |
+ "github.com/GoogleCloudPlatform/kubernetes/pkg/labels" |
|
| 14 |
+ "github.com/openshift/origin/pkg/diagnostics/log" |
|
| 15 |
+ "github.com/openshift/origin/pkg/diagnostics/types" |
|
| 16 |
+) |
|
| 17 |
+ |
|
| 18 |
+const ( |
|
| 19 |
+ clientErrorGettingNodes = `Client error while retrieving node records. Client retrieved records |
|
| 20 |
+during discovery, so this is likely to be a transient error. Try running |
|
| 21 |
+diagnostics again. If this message persists, there may be a permissions |
|
| 22 |
+problem with getting node records. The error was: |
|
| 23 |
+ |
|
| 24 |
+(%T) %[1]v` |
|
| 25 |
+ |
|
| 26 |
+ nodeNotReady = `Node {{.node}} is defined but is not marked as ready.
|
|
| 27 |
+Ready status is {{.status}} because "{{.reason}}"
|
|
| 28 |
+If the node is not intentionally disabled, check that the master can |
|
| 29 |
+reach the node hostname for a health check and the node is checking in |
|
| 30 |
+to the master with the same hostname. |
|
| 31 |
+ |
|
| 32 |
+While in this state, pods should not be scheduled to deploy on the node, |
|
| 33 |
+and any existing scheduled pods will be considered failed and removed. |
|
| 34 |
+` |
|
| 35 |
+ |
|
| 36 |
+ nodeNotSched = `Node {{.node}} is ready but is marked Unschedulable.
|
|
| 37 |
+This is usually set manually for administrative reasons. |
|
| 38 |
+An administrator can mark the node schedulable with: |
|
| 39 |
+ oadm manage-node {{.node}} --schedulable=true
|
|
| 40 |
+ |
|
| 41 |
+While in this state, pods should not be scheduled to deploy on the node. |
|
| 42 |
+Existing pods will continue to run until completed or evacuated (see |
|
| 43 |
+other options for 'oadm manage-node'). |
|
| 44 |
+` |
|
| 45 |
+) |
|
| 46 |
+ |
|
| 47 |
+// NodeDefinitions |
|
| 48 |
+type NodeDefinitions struct {
|
|
| 49 |
+ KubeClient *kclient.Client |
|
| 50 |
+} |
|
| 51 |
+ |
|
| 52 |
+func (d NodeDefinitions) Name() string {
|
|
| 53 |
+ return "NodeDefinitions" |
|
| 54 |
+} |
|
| 55 |
+ |
|
| 56 |
+func (d NodeDefinitions) Description() string {
|
|
| 57 |
+ return "Check node records on master" |
|
| 58 |
+} |
|
| 59 |
+ |
|
| 60 |
+func (d NodeDefinitions) CanRun() (bool, error) {
|
|
| 61 |
+ if d.KubeClient == nil {
|
|
| 62 |
+ return false, errors.New("must have kube client")
|
|
| 63 |
+ } |
|
| 64 |
+ if _, err := d.KubeClient.Nodes().List(labels.LabelSelector{}, fields.Everything()); err != nil {
|
|
| 65 |
+ // TODO check for 403 to return: "Client does not have cluster-admin access and cannot see node records" |
|
| 66 |
+ |
|
| 67 |
+ msg := log.Message{ID: "clGetNodesFailed", EvaluatedText: fmt.Sprintf(clientErrorGettingNodes, err)}
|
|
| 68 |
+ return false, types.DiagnosticError{msg.ID, &msg, err}
|
|
| 69 |
+ } |
|
| 70 |
+ |
|
| 71 |
+ return true, nil |
|
| 72 |
+} |
|
| 73 |
+ |
|
| 74 |
+func (d NodeDefinitions) Check() *types.DiagnosticResult {
|
|
| 75 |
+ r := types.NewDiagnosticResult("NodeDefinition")
|
|
| 76 |
+ |
|
| 77 |
+ nodes, err := d.KubeClient.Nodes().List(labels.LabelSelector{}, fields.Everything())
|
|
| 78 |
+ if err != nil {
|
|
| 79 |
+ r.Errorf("clGetNodesFailed", err, clientErrorGettingNodes, err)
|
|
| 80 |
+ return r |
|
| 81 |
+ } |
|
| 82 |
+ |
|
| 83 |
+ anyNodesAvail := false |
|
| 84 |
+ for _, node := range nodes.Items {
|
|
| 85 |
+ var ready *kapi.NodeCondition |
|
| 86 |
+ for i, condition := range node.Status.Conditions {
|
|
| 87 |
+ switch condition.Type {
|
|
| 88 |
+ // Each condition appears only once. Currently there's only one... used to be more |
|
| 89 |
+ case kapi.NodeReady: |
|
| 90 |
+ ready = &node.Status.Conditions[i] |
|
| 91 |
+ } |
|
| 92 |
+ } |
|
| 93 |
+ |
|
| 94 |
+ if ready == nil || ready.Status != kapi.ConditionTrue {
|
|
| 95 |
+ templateData := log.Hash{"node": node.Name}
|
|
| 96 |
+ if ready == nil {
|
|
| 97 |
+ templateData["status"] = "None" |
|
| 98 |
+ templateData["reason"] = "There is no readiness record." |
|
| 99 |
+ } else {
|
|
| 100 |
+ templateData["status"] = ready.Status |
|
| 101 |
+ templateData["reason"] = ready.Reason |
|
| 102 |
+ } |
|
| 103 |
+ r.Warnt("clNodeNotReady", nil, nodeNotReady, templateData)
|
|
| 104 |
+ } else if node.Spec.Unschedulable {
|
|
| 105 |
+ r.Warnt("clNodeNotSched", nil, nodeNotSched, log.Hash{"node": node.Name})
|
|
| 106 |
+ } else {
|
|
| 107 |
+ anyNodesAvail = true |
|
| 108 |
+ } |
|
| 109 |
+ } |
|
| 110 |
+ if !anyNodesAvail {
|
|
| 111 |
+ r.Error("clNoAvailNodes", nil, "There were no nodes available for OpenShift to use.")
|
|
| 112 |
+ } |
|
| 113 |
+ |
|
| 114 |
+ return r |
|
| 115 |
+} |
| 0 | 116 |
deleted file mode 100644 |
| ... | ... |
@@ -1,104 +0,0 @@ |
| 1 |
-package discovery // client |
|
| 2 |
- |
|
| 3 |
-import ( |
|
| 4 |
- "fmt" |
|
| 5 |
- "github.com/openshift/origin/pkg/diagnostics/log" |
|
| 6 |
- "github.com/openshift/origin/pkg/diagnostics/types" |
|
| 7 |
- "os" |
|
| 8 |
- "os/exec" |
|
| 9 |
- "path/filepath" |
|
| 10 |
- "runtime" |
|
| 11 |
- "strings" |
|
| 12 |
-) |
|
| 13 |
- |
|
| 14 |
-// ---------------------------------------------------------- |
|
| 15 |
-// Look for 'osc' and 'openshift' executables |
|
| 16 |
-func (env *Environment) DiscoverClient() error {
|
|
| 17 |
- var err error |
|
| 18 |
- f := env.Options.ClientDiagOptions.Factory |
|
| 19 |
- if config, err := f.OpenShiftClientConfig.RawConfig(); err != nil {
|
|
| 20 |
- env.Log.Errorf("discCCstart", "Could not read client config: (%T) %[1]v", err)
|
|
| 21 |
- } else {
|
|
| 22 |
- env.OsConfig = &config |
|
| 23 |
- env.FactoryForContext[config.CurrentContext] = f |
|
| 24 |
- } |
|
| 25 |
- env.Log.Debug("discSearchExec", "Searching for executables in path:\n "+strings.Join(filepath.SplitList(os.Getenv("PATH")), "\n ")) //TODO for non-Linux OS
|
|
| 26 |
- env.OscPath = env.findExecAndLog("osc")
|
|
| 27 |
- if env.OscPath != "" {
|
|
| 28 |
- env.OscVersion, err = getExecVersion(env.OscPath, env.Log) |
|
| 29 |
- } |
|
| 30 |
- env.OpenshiftPath = env.findExecAndLog("openshift")
|
|
| 31 |
- if env.OpenshiftPath != "" {
|
|
| 32 |
- env.OpenshiftVersion, err = getExecVersion(env.OpenshiftPath, env.Log) |
|
| 33 |
- } |
|
| 34 |
- if env.OpenshiftVersion.NonZero() && env.OscVersion.NonZero() && !env.OpenshiftVersion.Eq(env.OscVersion) {
|
|
| 35 |
- env.Log.Warnm("discVersionMM", log.Msg{"osV": env.OpenshiftVersion.GoString(), "oscV": env.OscVersion.GoString(),
|
|
| 36 |
- "text": fmt.Sprintf("'openshift' version %#v does not match 'osc' version %#v; update or remove the lower version", env.OpenshiftVersion, env.OscVersion)})
|
|
| 37 |
- } |
|
| 38 |
- return err |
|
| 39 |
-} |
|
| 40 |
- |
|
| 41 |
-// ---------------------------------------------------------- |
|
| 42 |
-// Look for a specific executable and log what happens |
|
| 43 |
-func (env *Environment) findExecAndLog(cmd string) string {
|
|
| 44 |
- if path := findExecFor(cmd); path != "" {
|
|
| 45 |
- env.Log.Infom("discExecFound", log.Msg{"command": cmd, "path": path, "tmpl": "Found '{{.command}}' at {{.path}}"})
|
|
| 46 |
- return path |
|
| 47 |
- } else {
|
|
| 48 |
- env.Log.Warnm("discExecNoPath", log.Msg{"command": cmd, "tmpl": "No '{{.command}}' executable was found in your path"})
|
|
| 49 |
- } |
|
| 50 |
- return "" |
|
| 51 |
-} |
|
| 52 |
- |
|
| 53 |
-// ---------------------------------------------------------- |
|
| 54 |
-// Look in the path for an executable |
|
| 55 |
-func findExecFor(cmd string) string {
|
|
| 56 |
- path, err := exec.LookPath(cmd) |
|
| 57 |
- if err == nil {
|
|
| 58 |
- return path |
|
| 59 |
- } |
|
| 60 |
- if runtime.GOOS == "windows" {
|
|
| 61 |
- path, err = exec.LookPath(cmd + ".exe") |
|
| 62 |
- if err == nil {
|
|
| 63 |
- return path |
|
| 64 |
- } |
|
| 65 |
- } |
|
| 66 |
- return "" |
|
| 67 |
-} |
|
| 68 |
- |
|
| 69 |
-// ---------------------------------------------------------- |
|
| 70 |
-// Invoke executable's "version" command to determine version |
|
| 71 |
-func getExecVersion(path string, logger *log.Logger) (version types.Version, err error) {
|
|
| 72 |
- cmd := exec.Command(path, "version") |
|
| 73 |
- var out []byte |
|
| 74 |
- out, err = cmd.CombinedOutput() |
|
| 75 |
- if err == nil {
|
|
| 76 |
- var name string |
|
| 77 |
- var x, y, z int |
|
| 78 |
- if scanned, err := fmt.Sscanf(string(out), "%s v%d.%d.%d", &name, &x, &y, &z); scanned > 1 {
|
|
| 79 |
- version = types.Version{x, y, z}
|
|
| 80 |
- logger.Infom("discVersion", log.Msg{"tmpl": "version of {{.command}} is {{.version}}", "command": name, "version": version.GoString()})
|
|
| 81 |
- } else {
|
|
| 82 |
- logger.Errorf("discVersErr", `
|
|
| 83 |
-Expected version output from '%s version' |
|
| 84 |
-Could not parse output received: |
|
| 85 |
-%v |
|
| 86 |
-Error was: %#v`, path, string(out), err) |
|
| 87 |
- } |
|
| 88 |
- } else {
|
|
| 89 |
- switch err.(type) {
|
|
| 90 |
- case *exec.Error: |
|
| 91 |
- logger.Errorf("discVersErr", "error in executing '%v version': %v", path, err)
|
|
| 92 |
- case *exec.ExitError: |
|
| 93 |
- logger.Errorf("discVersErr", `
|
|
| 94 |
-Executed '%v version' which exited with an error code. |
|
| 95 |
-This version is likely old or broken. |
|
| 96 |
-Error was '%v'; |
|
| 97 |
-Output was: |
|
| 98 |
-%v`, path, err.Error(), log.LimitLines(string(out), 5)) |
|
| 99 |
- default: |
|
| 100 |
- logger.Errorf("discVersErr", "executed '%v version' but an error occurred:\n%v\nOutput was:\n%v", path, err, string(out))
|
|
| 101 |
- } |
|
| 102 |
- } |
|
| 103 |
- return version, err |
|
| 104 |
-} |
| 105 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,378 +0,0 @@ |
| 1 |
-package discovery // config |
|
| 2 |
- |
|
| 3 |
-import ( |
|
| 4 |
- "fmt" |
|
| 5 |
- kapi "github.com/GoogleCloudPlatform/kubernetes/pkg/api" |
|
| 6 |
- "github.com/GoogleCloudPlatform/kubernetes/pkg/client/clientcmd" |
|
| 7 |
- clientcmdapi "github.com/GoogleCloudPlatform/kubernetes/pkg/client/clientcmd/api" |
|
| 8 |
- "github.com/GoogleCloudPlatform/kubernetes/pkg/fields" |
|
| 9 |
- "github.com/GoogleCloudPlatform/kubernetes/pkg/labels" |
|
| 10 |
- "github.com/openshift/origin/pkg/cmd/cli/config" |
|
| 11 |
- "github.com/openshift/origin/pkg/cmd/experimental/diagnostics/options" |
|
| 12 |
- osclientcmd "github.com/openshift/origin/pkg/cmd/util/clientcmd" |
|
| 13 |
- "github.com/openshift/origin/pkg/diagnostics/log" |
|
| 14 |
- "io/ioutil" |
|
| 15 |
- "os" |
|
| 16 |
- "regexp" |
|
| 17 |
- "strings" |
|
| 18 |
-) |
|
| 19 |
- |
|
| 20 |
-/* ---------------------------------------------------------- |
|
| 21 |
-Look for the client config and try to read it. |
|
| 22 |
- |
|
| 23 |
-We will look in the standard locations, alert the user to what we find |
|
| 24 |
-as we go along, and try to be helpful. |
|
| 25 |
-*/ |
|
| 26 |
- |
|
| 27 |
-// ------------------------------------------------------------- |
|
| 28 |
-// Look for client config file in a number of possible locations |
|
| 29 |
-func (env *Environment) ReadClientConfigFiles() {
|
|
| 30 |
- confFlagName := options.FlagAllClientConfigName |
|
| 31 |
- confFlag := env.Options.ClientConfigPath // from openshift-diagnostics --client-config |
|
| 32 |
- if flags := env.Options.GlobalFlags; flags != nil {
|
|
| 33 |
- name := config.OpenShiftConfigFlagName |
|
| 34 |
- if flag := env.Options.GlobalFlags.Lookup(name); flag != nil {
|
|
| 35 |
- confFlag = flag.Value.String() // from openshift-diagnostics client --config |
|
| 36 |
- confFlagName = name |
|
| 37 |
- } |
|
| 38 |
- } |
|
| 39 |
- var found bool |
|
| 40 |
- rules := config.NewOpenShiftClientConfigLoadingRules() |
|
| 41 |
- paths := append([]string{confFlag}, rules.Precedence...)
|
|
| 42 |
- for index, path := range paths {
|
|
| 43 |
- errmsg := "" |
|
| 44 |
- switch index {
|
|
| 45 |
- case 0: |
|
| 46 |
- errmsg = fmt.Sprintf("--"+confFlagName+" specified that client config should be at %s\n", path)
|
|
| 47 |
- case len(paths) - 1: |
|
| 48 |
- // do nothing, the config wasn't found in ~ |
|
| 49 |
- default: |
|
| 50 |
- if len(os.Getenv(config.OpenShiftConfigPathEnvVar)) != 0 {
|
|
| 51 |
- errmsg = fmt.Sprintf("$OPENSHIFTCONFIG specified that client config should be at %s\n", path)
|
|
| 52 |
- } |
|
| 53 |
- } |
|
| 54 |
- |
|
| 55 |
- if rawConfig := openConfigFile(path, errmsg, env.Log); rawConfig != nil && !found {
|
|
| 56 |
- found = true |
|
| 57 |
- env.ClientConfigPath = path |
|
| 58 |
- env.ClientConfigRaw = rawConfig |
|
| 59 |
- } |
|
| 60 |
- } |
|
| 61 |
- if found {
|
|
| 62 |
- if confFlag != "" && confFlag != env.ClientConfigPath {
|
|
| 63 |
- // found config but not where --config said, so don't continue discovery |
|
| 64 |
- env.Log.Errorf("discCCnotFlag", `
|
|
| 65 |
-The client configuration file was not found where the --%s flag indicated: |
|
| 66 |
- %s |
|
| 67 |
-A config file was found at the following location: |
|
| 68 |
- %s |
|
| 69 |
-If you wish to use this file for client configuration, you can specify it |
|
| 70 |
-with the --%[1]s flag, or just not specify the flag. |
|
| 71 |
- `, confFlagName, confFlag, env.ClientConfigPath) |
|
| 72 |
- } else {
|
|
| 73 |
- // happy path, client config found as expected |
|
| 74 |
- env.WillCheck[ClientTarget] = true |
|
| 75 |
- } |
|
| 76 |
- } else { // not found, decide what to do
|
|
| 77 |
- if confFlag != "" { // user expected conf file at specific place
|
|
| 78 |
- env.Log.Errorf("discNoCC", "The client configuration file was not found where --%s='%s' indicated.", confFlagName, confFlag)
|
|
| 79 |
- } else if !env.Options.ClientDiagOptions.MustCheck {
|
|
| 80 |
- env.Log.Notice("discSkipCLI", "No client config file found; client diagnostics will not be performed.")
|
|
| 81 |
- } else {
|
|
| 82 |
- // user specifically wants to troubleshoot client, but no conf file given |
|
| 83 |
- env.Log.Warn("discNoCCfile", "No client config file read; OpenShift client diagnostics will use flags and default configuration.")
|
|
| 84 |
- env.WillCheck[ClientTarget] = true |
|
| 85 |
- adminPaths := []string{
|
|
| 86 |
- "/etc/openshift/master/admin.kubeconfig", // enterprise |
|
| 87 |
- "/openshift.local.config/master/admin.kubeconfig", // origin systemd |
|
| 88 |
- "./openshift.local.config/master/admin.kubeconfig", // origin binary |
|
| 89 |
- } |
|
| 90 |
- adminWarningF := ` |
|
| 91 |
-No client config file was available; however, one exists at |
|
| 92 |
- %[1]s |
|
| 93 |
-which is a standard location where the master generates it. |
|
| 94 |
-If this is what you want to use, you should copy it to a standard location |
|
| 95 |
-(~/.config/openshift/.config, or the current directory), or you can set the |
|
| 96 |
-environment variable OPENSHIFTCONFIG in your ~/.bash_profile: |
|
| 97 |
- export OPENSHIFTCONFIG=%[1]s |
|
| 98 |
-If this is not what you want, you should obtain a config file and |
|
| 99 |
-place it in a standard location. |
|
| 100 |
-` |
|
| 101 |
- // look for it in auto-generated locations when not found properly |
|
| 102 |
- for _, path := range adminPaths {
|
|
| 103 |
- if conf := openConfigFile(path, "", env.Log); conf != nil {
|
|
| 104 |
- env.Log.Warnf("discCCautoPath", adminWarningF, path)
|
|
| 105 |
- break |
|
| 106 |
- } |
|
| 107 |
- } |
|
| 108 |
- } |
|
| 109 |
- } |
|
| 110 |
-} |
|
| 111 |
- |
|
| 112 |
-// ---------------------------------------------------------- |
|
| 113 |
-// Attempt to open file at path as client config |
|
| 114 |
-// If there is a problem and errmsg is set, log an error |
|
| 115 |
-func openConfigFile(path string, errmsg string, logger *log.Logger) *clientcmdapi.Config {
|
|
| 116 |
- var err error |
|
| 117 |
- var file *os.File |
|
| 118 |
- if path == "" { // empty param/envvar
|
|
| 119 |
- return nil |
|
| 120 |
- } else if file, err = os.Open(path); err == nil {
|
|
| 121 |
- logger.Debugm("discOpenCC", log.Msg{"tmpl": "Reading client config at {{.path}}", "path": path})
|
|
| 122 |
- } else if errmsg == "" {
|
|
| 123 |
- logger.Debugf("discOpenCCNo", "Could not read client config at %s:\n%#v", path, err)
|
|
| 124 |
- } else if os.IsNotExist(err) {
|
|
| 125 |
- logger.Error("discOpenCCNoExist", errmsg+"but that file does not exist.")
|
|
| 126 |
- } else if os.IsPermission(err) {
|
|
| 127 |
- logger.Error("discOpenCCNoPerm", errmsg+"but lack permission to read that file.")
|
|
| 128 |
- } else {
|
|
| 129 |
- logger.Errorf("discOpenCCErr", "%sbut there was an error opening it:\n%#v", errmsg, err)
|
|
| 130 |
- } |
|
| 131 |
- if file != nil { // it is open for reading
|
|
| 132 |
- defer file.Close() |
|
| 133 |
- if buffer, err := ioutil.ReadAll(file); err != nil {
|
|
| 134 |
- logger.Errorf("discCCReadErr", "Unexpected error while reading client config file (%s): %v", path, err)
|
|
| 135 |
- } else if conf, err := clientcmd.Load(buffer); err != nil {
|
|
| 136 |
- logger.Errorf("discCCYamlErr", `
|
|
| 137 |
-Error reading YAML from client config file (%s): |
|
| 138 |
- %v |
|
| 139 |
-This file may have been truncated or mis-edited. |
|
| 140 |
-Please fix, remove, or obtain a new client config`, file.Name(), err) |
|
| 141 |
- } else {
|
|
| 142 |
- logger.Infom("discCCRead", log.Msg{"tmpl": `Successfully read a client config file at '{{.path}}'`, "path": path})
|
|
| 143 |
- /* Note, we're not going to use this config file directly. |
|
| 144 |
- * Instead, we'll defer to the openshift client code to assimilate |
|
| 145 |
- * flags, env vars, and the potential hierarchy of config files |
|
| 146 |
- * into an actual configuration that the client uses. |
|
| 147 |
- * However, for diagnostic purposes, record the first we find. |
|
| 148 |
- */ |
|
| 149 |
- return conf |
|
| 150 |
- } |
|
| 151 |
- } |
|
| 152 |
- return nil |
|
| 153 |
-} |
|
| 154 |
- |
|
| 155 |
-/* The full client configuration may specify multiple contexts, each |
|
| 156 |
- * of which could be a different server, a different user, a different |
|
| 157 |
- * default project. We want to check which contexts have useful access, |
|
| 158 |
- * and record those. At this point, we should already have the factory |
|
| 159 |
- * for the current context. Factories embed config and a client cache, |
|
| 160 |
- * and since we want to do discovery for every available context, we are |
|
| 161 |
- * going to create a factory for each context. We will determine which |
|
| 162 |
- * context actually has access to the default project, preferring the |
|
| 163 |
- * current (default) context if it does. Connection errors should be |
|
| 164 |
- * diagnosed along the way. |
|
| 165 |
- */ |
|
| 166 |
-func (env *Environment) ConfigClient() {
|
|
| 167 |
- if env.OsConfig != nil {
|
|
| 168 |
- // TODO: run these in parallel, with a time limit so connection timeouts don't take forever |
|
| 169 |
- for cname, context := range env.OsConfig.Contexts {
|
|
| 170 |
- // set context, create factory, see what's available |
|
| 171 |
- if env.FactoryForContext[cname] == nil {
|
|
| 172 |
- //config := clientcmd.NewNonInteractiveClientConfig(env.Factory.OpenShiftClientConfig, cname, &clientcmd.ConfigOverrides{})
|
|
| 173 |
- config := clientcmd.NewNonInteractiveClientConfig(*env.OsConfig, cname, &clientcmd.ConfigOverrides{})
|
|
| 174 |
- f := osclientcmd.NewFactory(config) |
|
| 175 |
- //f.BindFlags(env.Flags.OpenshiftFlags) |
|
| 176 |
- env.FactoryForContext[cname] = f |
|
| 177 |
- } |
|
| 178 |
- if access := getContextAccess(env.FactoryForContext[cname], cname, context, env.Log); access != nil {
|
|
| 179 |
- env.AccessForContext[cname] = access |
|
| 180 |
- if access.ClusterAdmin && (cname == env.OsConfig.CurrentContext || env.ClusterAdminFactory == nil) {
|
|
| 181 |
- env.ClusterAdminFactory = env.FactoryForContext[cname] |
|
| 182 |
- } |
|
| 183 |
- } |
|
| 184 |
- } |
|
| 185 |
- } |
|
| 186 |
-} |
|
| 187 |
- |
|
| 188 |
-// for now, only try to determine what namespaces a user can see |
|
| 189 |
-func getContextAccess(factory *osclientcmd.Factory, ctxName string, ctx clientcmdapi.Context, logger *log.Logger) *ContextAccess {
|
|
| 190 |
- // start by getting ready to log the result |
|
| 191 |
- msgText := "Testing client config context {{.context}}\nServer: {{.server}}\nUser: {{.user}}\n\n"
|
|
| 192 |
- msg := log.Msg{"id": "discCCctx", "tmpl": msgText}
|
|
| 193 |
- if config, err := factory.OpenShiftClientConfig.RawConfig(); err != nil {
|
|
| 194 |
- logger.Errorf("discCCstart", "Could not read client config: (%T) %[1]v", err)
|
|
| 195 |
- return nil |
|
| 196 |
- } else {
|
|
| 197 |
- msg["context"] = ctxName |
|
| 198 |
- msg["server"] = config.Clusters[ctx.Cluster].Server |
|
| 199 |
- msg["user"] = ctx.AuthInfo |
|
| 200 |
- } |
|
| 201 |
- // actually go and request project list from the server |
|
| 202 |
- if osclient, _, err := factory.Clients(); err != nil {
|
|
| 203 |
- logger.Errorf("discCCctxClients", "Failed to create client during discovery with error:\n(%T) %[1]v\nThis is probably an OpenShift bug.", err)
|
|
| 204 |
- return nil |
|
| 205 |
- } else if projects, err := osclient.Projects().List(labels.Everything(), fields.Everything()); err == nil { // success!
|
|
| 206 |
- list := projects.Items |
|
| 207 |
- if len(list) == 0 {
|
|
| 208 |
- msg["tmpl"] = msgText + "Successfully requested project list, but it is empty, so user has no access to anything." |
|
| 209 |
- msg["projects"] = make([]string, 0) |
|
| 210 |
- logger.Infom("discCCctxSuccess", msg)
|
|
| 211 |
- return nil |
|
| 212 |
- } |
|
| 213 |
- access := &ContextAccess{Projects: make([]string, len(list))}
|
|
| 214 |
- for i, project := range list {
|
|
| 215 |
- access.Projects[i] = project.Name |
|
| 216 |
- if project.Name == kapi.NamespaceDefault {
|
|
| 217 |
- access.ClusterAdmin = true |
|
| 218 |
- } |
|
| 219 |
- } |
|
| 220 |
- if access.ClusterAdmin {
|
|
| 221 |
- msg["tmpl"] = msgText + "Successfully requested project list; has access to default project, so assumed to be a cluster-admin" |
|
| 222 |
- logger.Infom("discCCctxSuccess", msg)
|
|
| 223 |
- } else {
|
|
| 224 |
- msg["tmpl"] = msgText + "Successfully requested project list; has access to project(s): {{.projectStr}}"
|
|
| 225 |
- msg["projects"] = access.Projects |
|
| 226 |
- msg["projectStr"] = strings.Join(access.Projects, ", ") |
|
| 227 |
- logger.Infom("discCCctxSuccess", msg)
|
|
| 228 |
- } |
|
| 229 |
- return access |
|
| 230 |
- } else { // something went wrong, so diagnose it
|
|
| 231 |
- noResolveRx := regexp.MustCompile("dial tcp: lookup (\\S+): no such host")
|
|
| 232 |
- unknownCaMsg := "x509: certificate signed by unknown authority" |
|
| 233 |
- unneededCaMsg := "specifying a root certificates file with the insecure flag is not allowed" |
|
| 234 |
- invalidCertNameRx := regexp.MustCompile("x509: certificate is valid for (\\S+, )+not (\\S+)")
|
|
| 235 |
- connRefusedRx := regexp.MustCompile("dial tcp (\\S+): connection refused")
|
|
| 236 |
- connTimeoutRx := regexp.MustCompile("dial tcp (\\S+): (?:connection timed out|i/o timeout)")
|
|
| 237 |
- unauthenticatedMsg := `403 Forbidden: Forbidden: "/osapi/v1beta1/projects?namespace=" denied by default` |
|
| 238 |
- unauthorizedRx := regexp.MustCompile("401 Unauthorized: Unauthorized$")
|
|
| 239 |
- |
|
| 240 |
- malformedHTTPMsg := "malformed HTTP response" |
|
| 241 |
- malformedTLSMsg := "tls: oversized record received with length" |
|
| 242 |
- |
|
| 243 |
- // interpret the error message for mere mortals |
|
| 244 |
- errm := err.Error() |
|
| 245 |
- var reason, errId string |
|
| 246 |
- switch {
|
|
| 247 |
- case noResolveRx.MatchString(errm): |
|
| 248 |
- errId, reason = "clientNoResolve", ` |
|
| 249 |
-This usually means that the hostname does not resolve to an IP. |
|
| 250 |
-Hostnames should usually be resolved via DNS or an /etc/hosts file. |
|
| 251 |
-Ensure that the hostname resolves correctly from your host before proceeding. |
|
| 252 |
-Of course, your config could also simply have the wrong hostname specified. |
|
| 253 |
-` |
|
| 254 |
- case strings.Contains(errm, unknownCaMsg): |
|
| 255 |
- errId, reason = "clientUnknownCa", ` |
|
| 256 |
-This means that we cannot validate the certificate in use by the |
|
| 257 |
-OpenShift API server, so we cannot securely communicate with it. |
|
| 258 |
-Connections could be intercepted and your credentials stolen. |
|
| 259 |
- |
|
| 260 |
-Since the server certificate we see when connecting is not validated |
|
| 261 |
-by public certificate authorities (CAs), you probably need to specify a |
|
| 262 |
-certificate from a private CA to validate the connection. |
|
| 263 |
- |
|
| 264 |
-Your config may be specifying the wrong CA cert, or none, or there |
|
| 265 |
-could actually be a man-in-the-middle attempting to intercept your |
|
| 266 |
-connection. If you are unconcerned about any of this, you can add the |
|
| 267 |
-but this is risky and should not be necessary. |
|
| 268 |
-** Connections could be intercepted and your credentials stolen. ** |
|
| 269 |
-` |
|
| 270 |
- case strings.Contains(errm, unneededCaMsg): |
|
| 271 |
- errId, reason = "clientUnneededCa", ` |
|
| 272 |
-This means that for client connections to the OpenShift API server, you |
|
| 273 |
-(or your kubeconfig) specified both a validating certificate authority |
|
| 274 |
-and that the client should bypass connection security validation. |
|
| 275 |
- |
|
| 276 |
-This is not allowed because it is likely to be a mistake. |
|
| 277 |
- |
|
| 278 |
-If you want to use --insecure-skip-tls-verify to bypass security (which |
|
| 279 |
-is usually a bad idea anyway), then you need to also clear the CA cert |
|
| 280 |
-from your command line options or kubeconfig file(s). Of course, it |
|
| 281 |
-would be far better to obtain and use a correct CA cert. |
|
| 282 |
-` |
|
| 283 |
- case invalidCertNameRx.MatchString(errm): |
|
| 284 |
- match := invalidCertNameRx.FindStringSubmatch(errm) |
|
| 285 |
- serverHost := match[len(match)-1] |
|
| 286 |
- errId, reason = "clientInvCertName", fmt.Sprintf(` |
|
| 287 |
-This means that the certificate in use by the OpenShift API server |
|
| 288 |
-(master) does not match the hostname by which you are addressing it: |
|
| 289 |
- %s |
|
| 290 |
-so a secure connection is not allowed. In theory, this *could* mean that |
|
| 291 |
-someone is intercepting your connection and presenting a certificate |
|
| 292 |
-that is valid but for a different server, which is why secure validation |
|
| 293 |
-fails in this case. |
|
| 294 |
- |
|
| 295 |
-However, the most likely explanation is that the server certificate |
|
| 296 |
-needs to be updated to include the name you are using to reach it. |
|
| 297 |
- |
|
| 298 |
-If the OpenShift server is generating its own certificates (which |
|
| 299 |
-is default), then the --public-master flag on the OpenShift master is |
|
| 300 |
-usually the easiest way to do this. If you need something more complicated |
|
| 301 |
-(for instance, multiple public addresses for the API, or your own CA), |
|
| 302 |
-then you will need to custom-generate the server certificate with the |
|
| 303 |
-right names yourself. |
|
| 304 |
- |
|
| 305 |
-If you are unconcerned about any of this, you can add the |
|
| 306 |
-but this is risky and should not be necessary. |
|
| 307 |
-** Connections could be intercepted and your credentials stolen. ** |
|
| 308 |
-`, serverHost) |
|
| 309 |
- case connRefusedRx.MatchString(errm): |
|
| 310 |
- errId, reason = "clientInvCertName", ` |
|
| 311 |
-This means that when we tried to connect to the OpenShift API |
|
| 312 |
-server (master), we reached the host, but nothing accepted the port |
|
| 313 |
-connection. This could mean that the OpenShift master is stopped, or |
|
| 314 |
-that a firewall or security policy is blocking access at that port. |
|
| 315 |
- |
|
| 316 |
-You will not be able to connect or do anything at all with OpenShift |
|
| 317 |
-until this server problem is resolved or you specify a corrected |
|
| 318 |
-server address.` |
|
| 319 |
- case connTimeoutRx.MatchString(errm): |
|
| 320 |
- errId, reason = "clientConnTimeout", ` |
|
| 321 |
-This means that when we tried to connect to the OpenShift API server |
|
| 322 |
-(master), we could not reach the host at all. |
|
| 323 |
-* You may have specified the wrong host address. |
|
| 324 |
-* This could mean the host is completely unavailable (down). |
|
| 325 |
-* This could indicate a routing problem or a firewall that simply |
|
| 326 |
- drops requests rather than responding by reseting the connection. |
|
| 327 |
-* It does not generally mean that DNS name resolution failed (which |
|
| 328 |
- would be a different error) though the problem could be that it |
|
| 329 |
- gave the wrong address.` |
|
| 330 |
- case strings.Contains(errm, malformedHTTPMsg): |
|
| 331 |
- errId, reason = "clientMalformedHTTP", ` |
|
| 332 |
-This means that when we tried to connect to the OpenShift API server |
|
| 333 |
-(master) with a plain HTTP connection, the server did not speak |
|
| 334 |
-HTTP back to us. The most common explanation is that a secure server |
|
| 335 |
-is listening but you specified an http: connection instead of https:. |
|
| 336 |
-There could also be another service listening at the intended port |
|
| 337 |
-speaking some other protocol entirely. |
|
| 338 |
- |
|
| 339 |
-You will not be able to connect or do anything at all with OpenShift |
|
| 340 |
-until this server problem is resolved or you specify a corrected |
|
| 341 |
-server address.` |
|
| 342 |
- case strings.Contains(errm, malformedTLSMsg): |
|
| 343 |
- errId, reason = "clientMalformedTLS", ` |
|
| 344 |
-This means that when we tried to connect to the OpenShift API server |
|
| 345 |
-(master) with a secure HTTPS connection, the server did not speak |
|
| 346 |
-HTTPS back to us. The most common explanation is that the server |
|
| 347 |
-listening at that port is not the secure server you expected - it |
|
| 348 |
-may be a non-secure HTTP server or the wrong service may be |
|
| 349 |
-listening there, or you may have specified an incorrect port. |
|
| 350 |
- |
|
| 351 |
-You will not be able to connect or do anything at all with OpenShift |
|
| 352 |
-until this server problem is resolved or you specify a corrected |
|
| 353 |
-server address.` |
|
| 354 |
- case strings.Contains(errm, unauthenticatedMsg): |
|
| 355 |
- errId, reason = "clientUnauthn", ` |
|
| 356 |
-This means that when we tried to make a request to the OpenShift API |
|
| 357 |
-server, your kubeconfig did not present valid credentials to |
|
| 358 |
-authenticate your client. Credentials generally consist of a client |
|
| 359 |
-key/certificate or an access token. Your kubeconfig may not have |
|
| 360 |
-presented any, or they may be invalid.` |
|
| 361 |
- case unauthorizedRx.MatchString(errm): |
|
| 362 |
- errId, reason = "clientUnauthz", ` |
|
| 363 |
-This means that when we tried to make a request to the OpenShift API |
|
| 364 |
-server, the request required credentials that were not presented. |
|
| 365 |
-This can happen when an authentication token expires. Try logging in |
|
| 366 |
-with this user again.` |
|
| 367 |
- default: |
|
| 368 |
- errId, reason = "clientUnknownConnErr", `Diagnostics does not have an explanation for what this means. Please report this error so one can be added.` |
|
| 369 |
- } |
|
| 370 |
- errMsg := fmt.Sprintf("(%T) %[1]v", err)
|
|
| 371 |
- msg["tmpl"] = msgText + errMsg + reason |
|
| 372 |
- msg["errMsg"] = errMsg |
|
| 373 |
- logger.Errorm(errId, msg) |
|
| 374 |
- } |
|
| 375 |
- return nil |
|
| 376 |
-} |
| 377 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,19 +0,0 @@ |
| 1 |
-package discovery |
|
| 2 |
- |
|
| 3 |
-import ( |
|
| 4 |
- "os/exec" |
|
| 5 |
- "runtime" |
|
| 6 |
-) |
|
| 7 |
- |
|
| 8 |
-// ---------------------------------------------------------- |
|
| 9 |
-// Determine what we need to about the OS |
|
| 10 |
-func (env *Environment) DiscoverOperatingSystem() {
|
|
| 11 |
- if runtime.GOOS == "linux" {
|
|
| 12 |
- if _, err := exec.LookPath("systemctl"); err == nil {
|
|
| 13 |
- env.HasSystemd = true |
|
| 14 |
- } |
|
| 15 |
- if _, err := exec.LookPath("/bin/bash"); err == nil {
|
|
| 16 |
- env.HasBash = true |
|
| 17 |
- } |
|
| 18 |
- } |
|
| 19 |
-} |
| 20 | 1 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,46 @@ |
| 0 |
+package host |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "errors" |
|
| 4 |
+ |
|
| 5 |
+ configapilatest "github.com/openshift/origin/pkg/cmd/server/api/latest" |
|
| 6 |
+ configvalidation "github.com/openshift/origin/pkg/cmd/server/api/validation" |
|
| 7 |
+ "github.com/openshift/origin/pkg/diagnostics/types" |
|
| 8 |
+) |
|
| 9 |
+ |
|
| 10 |
+// MasterConfigCheck |
|
| 11 |
+type MasterConfigCheck struct {
|
|
| 12 |
+ MasterConfigFile string |
|
| 13 |
+} |
|
| 14 |
+ |
|
| 15 |
+func (d MasterConfigCheck) Name() string {
|
|
| 16 |
+ return "MasterConfigCheck" |
|
| 17 |
+} |
|
| 18 |
+ |
|
| 19 |
+func (d MasterConfigCheck) Description() string {
|
|
| 20 |
+ return "Check the master config file" |
|
| 21 |
+} |
|
| 22 |
+func (d MasterConfigCheck) CanRun() (bool, error) {
|
|
| 23 |
+ if len(d.MasterConfigFile) == 0 {
|
|
| 24 |
+ return false, errors.New("must have master config file")
|
|
| 25 |
+ } |
|
| 26 |
+ |
|
| 27 |
+ return true, nil |
|
| 28 |
+} |
|
| 29 |
+func (d MasterConfigCheck) Check() *types.DiagnosticResult {
|
|
| 30 |
+ r := types.NewDiagnosticResult("MasterConfigCheck")
|
|
| 31 |
+ |
|
| 32 |
+ r.Debugf("discMCfile", "Looking for master config file at '%s'", d.MasterConfigFile)
|
|
| 33 |
+ masterConfig, err := configapilatest.ReadAndResolveMasterConfig(d.MasterConfigFile) |
|
| 34 |
+ if err != nil {
|
|
| 35 |
+ r.Errorf("discMCfail", err, "Could not read master config file '%s':\n(%T) %[2]v", d.MasterConfigFile, err)
|
|
| 36 |
+ return r |
|
| 37 |
+ } |
|
| 38 |
+ |
|
| 39 |
+ r.Infof("discMCfound", "Found a master config file: %[1]s", d.MasterConfigFile)
|
|
| 40 |
+ |
|
| 41 |
+ for _, err := range configvalidation.ValidateMasterConfig(masterConfig).Errors {
|
|
| 42 |
+ r.Errorf("discMCinvalid", err, "Validation of master config file '%s' failed:\n(%T) %[2]v", d.MasterConfigFile, err)
|
|
| 43 |
+ } |
|
| 44 |
+ return r |
|
| 45 |
+} |
| 0 | 46 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,45 @@ |
| 0 |
+package host |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "errors" |
|
| 4 |
+ |
|
| 5 |
+ configapilatest "github.com/openshift/origin/pkg/cmd/server/api/latest" |
|
| 6 |
+ configvalidation "github.com/openshift/origin/pkg/cmd/server/api/validation" |
|
| 7 |
+ "github.com/openshift/origin/pkg/diagnostics/types" |
|
| 8 |
+) |
|
| 9 |
+ |
|
| 10 |
+// NodeConfigCheck |
|
| 11 |
+type NodeConfigCheck struct {
|
|
| 12 |
+ NodeConfigFile string |
|
| 13 |
+} |
|
| 14 |
+ |
|
| 15 |
+func (d NodeConfigCheck) Name() string {
|
|
| 16 |
+ return "NodeConfigCheck" |
|
| 17 |
+} |
|
| 18 |
+ |
|
| 19 |
+func (d NodeConfigCheck) Description() string {
|
|
| 20 |
+ return "Check the node config file" |
|
| 21 |
+} |
|
| 22 |
+func (d NodeConfigCheck) CanRun() (bool, error) {
|
|
| 23 |
+ if len(d.NodeConfigFile) == 0 {
|
|
| 24 |
+ return false, errors.New("must have node config file")
|
|
| 25 |
+ } |
|
| 26 |
+ |
|
| 27 |
+ return true, nil |
|
| 28 |
+} |
|
| 29 |
+func (d NodeConfigCheck) Check() *types.DiagnosticResult {
|
|
| 30 |
+ r := types.NewDiagnosticResult("NodeConfigCheck")
|
|
| 31 |
+ r.Debugf("discNCfile", "Looking for node config file at '%s'", d.NodeConfigFile)
|
|
| 32 |
+ nodeConfig, err := configapilatest.ReadAndResolveNodeConfig(d.NodeConfigFile) |
|
| 33 |
+ if err != nil {
|
|
| 34 |
+ r.Errorf("discNCfail", err, "Could not read node config file '%s':\n(%T) %[2]v", d.NodeConfigFile, err)
|
|
| 35 |
+ return r |
|
| 36 |
+ } |
|
| 37 |
+ |
|
| 38 |
+ r.Infof("discNCfound", "Found a node config file: %[1]s", d.NodeConfigFile)
|
|
| 39 |
+ |
|
| 40 |
+ for _, err := range configvalidation.ValidateNodeConfig(nodeConfig) {
|
|
| 41 |
+ r.Errorf("discNCinvalid", err, "Validation of node config file '%s' failed:\n(%T) %[2]v", d.NodeConfigFile, err)
|
|
| 42 |
+ } |
|
| 43 |
+ return r |
|
| 44 |
+} |
| ... | ... |
@@ -7,8 +7,11 @@ import ( |
| 7 | 7 |
ct "github.com/daviddengcn/go-colortext" |
| 8 | 8 |
"io" |
| 9 | 9 |
"io/ioutil" |
| 10 |
+ "runtime" |
|
| 10 | 11 |
"strings" |
| 11 | 12 |
"text/template" |
| 13 |
+ |
|
| 14 |
+ "github.com/openshift/origin/pkg/version" |
|
| 12 | 15 |
) |
| 13 | 16 |
|
| 14 | 17 |
type LoggerOptions struct {
|
| ... | ... |
@@ -35,6 +38,14 @@ type Level struct {
|
| 35 | 35 |
Bright bool |
| 36 | 36 |
} |
| 37 | 37 |
|
| 38 |
+func (l Level) MarshalJSON() ([]byte, error) {
|
|
| 39 |
+ return []byte(`"` + l.Name + `"`), nil |
|
| 40 |
+} |
|
| 41 |
+ |
|
| 42 |
+func (l Level) MarshalYAML() (interface{}, error) {
|
|
| 43 |
+ return l.Name, nil |
|
| 44 |
+} |
|
| 45 |
+ |
|
| 38 | 46 |
type Logger struct {
|
| 39 | 47 |
loggerType |
| 40 | 48 |
level Level |
| ... | ... |
@@ -44,7 +55,7 @@ type Logger struct {
|
| 44 | 44 |
|
| 45 | 45 |
// Internal type to deal with different log formats |
| 46 | 46 |
type loggerType interface {
|
| 47 |
- Write(LogEntry) |
|
| 47 |
+ Write(Entry) |
|
| 48 | 48 |
Finish() |
| 49 | 49 |
} |
| 50 | 50 |
|
| ... | ... |
@@ -85,18 +96,23 @@ func NewLogger(setLevel int, setFormat string, out io.Writer) (*Logger, error) {
|
| 85 | 85 |
} |
| 86 | 86 |
|
| 87 | 87 |
type Message struct {
|
| 88 |
- ID string |
|
| 89 |
- Template string |
|
| 90 |
- |
|
| 88 |
+ // ID: an identifier unique to the message being logged, intended for json/yaml output |
|
| 89 |
+ // so that automation can recognize specific messages without trying to parse them. |
|
| 90 |
+ ID string `json:"-" yaml:"-"` |
|
| 91 |
+ // Template: a template string as understood by text/template that can use any of the |
|
| 92 |
+ // TemplateData entries in this Message as inputs. |
|
| 93 |
+ Template string `json:"-" yaml:"-"` |
|
| 91 | 94 |
// TemplateData is passed to template executor to complete the message |
| 92 |
- TemplateData interface{}
|
|
| 95 |
+ TemplateData interface{} `json:"data,omitempty" yaml:"data,omitempty"`
|
|
| 93 | 96 |
|
| 94 |
- EvaluatedText string |
|
| 97 |
+ EvaluatedText string `json:"text" yaml:"text"` // human-readable message text |
|
| 95 | 98 |
} |
| 96 | 99 |
|
| 100 |
+type Hash map[string]interface{} // convenience/cosmetic type
|
|
| 101 |
+ |
|
| 97 | 102 |
func (m Message) String() string {
|
| 98 | 103 |
if len(m.EvaluatedText) > 0 {
|
| 99 |
- return fmt.Sprintf("%s: %s", m.EvaluatedText)
|
|
| 104 |
+ return m.EvaluatedText |
|
| 100 | 105 |
} |
| 101 | 106 |
|
| 102 | 107 |
if len(m.Template) == 0 {
|
| ... | ... |
@@ -105,7 +121,7 @@ func (m Message) String() string {
|
| 105 | 105 |
|
| 106 | 106 |
// if given a template, convert it to text |
| 107 | 107 |
parsedTmpl, err := template.New(m.ID).Parse(m.Template) |
| 108 |
- if err != nil {
|
|
| 108 |
+ if err != nil { // unless the template is broken of course
|
|
| 109 | 109 |
return fmt.Sprintf("%s: %s %#v: %v", m.ID, m.Template, m.TemplateData, err)
|
| 110 | 110 |
} |
| 111 | 111 |
|
| ... | ... |
@@ -118,23 +134,13 @@ func (m Message) String() string {
|
| 118 | 118 |
return buff.String() |
| 119 | 119 |
} |
| 120 | 120 |
|
| 121 |
-type LogEntry struct {
|
|
| 122 |
- Level Level |
|
| 123 |
- Message |
|
| 121 |
+type Entry struct {
|
|
| 122 |
+ ID string `json:"id"` |
|
| 123 |
+ Origin string `json:"origin"` |
|
| 124 |
+ Level Level `json:"level"` |
|
| 125 |
+ Message `yaml:"-,inline"` |
|
| 124 | 126 |
} |
| 125 | 127 |
|
| 126 |
-/* a Msg can be expected to have the following entries: |
|
| 127 |
- * "id": an identifier unique to the message being logged, intended for json/yaml output |
|
| 128 |
- * so that automation can recognize specific messages without trying to parse them. |
|
| 129 |
- * "text": human-readable message text |
|
| 130 |
- * "tmpl": a template string as understood by text/template that can use any of the other |
|
| 131 |
- * entries in this Msg as inputs. This is removed, evaluated, and the result is |
|
| 132 |
- * placed in "text". If there is an error during evaluation, the error is placed |
|
| 133 |
- * in "templateErr", the original id of the message is stored in "templateId", |
|
| 134 |
- * and the Msg id is changed to "tmplErr". Of course, this should never happen |
|
| 135 |
- * if there are no mistakes in the calling code. |
|
| 136 |
- */ |
|
| 137 |
- |
|
| 138 | 128 |
var ( |
| 139 | 129 |
ErrorLevel = Level{4, "error", "ERROR: ", ct.Red, true} // Something is definitely wrong
|
| 140 | 130 |
WarnLevel = Level{3, "warn", "WARN: ", ct.Yellow, true} // Likely to be an issue but maybe not
|
| ... | ... |
@@ -144,136 +150,126 @@ var ( |
| 144 | 144 |
) |
| 145 | 145 |
|
| 146 | 146 |
// Provide a summary at the end |
| 147 |
-func (l *Logger) Summary() {
|
|
| 148 |
- l.Notice("summary", "\nSummary of diagnostics execution:\n")
|
|
| 149 |
- if l.warningsSeen > 0 {
|
|
| 150 |
- l.Noticef("sumWarn", "Warnings seen: %d", l.warningsSeen)
|
|
| 147 |
+func (l *Logger) Summary(warningsSeen int, errorsSeen int) {
|
|
| 148 |
+ l.Noticef("summary", "\nSummary of diagnostics execution (version %v):\n", version.Get())
|
|
| 149 |
+ if warningsSeen > 0 {
|
|
| 150 |
+ l.Noticet("sumWarn", "Warnings seen: {{.warnings}}", Hash{"warnings": warningsSeen})
|
|
| 151 | 151 |
} |
| 152 |
- if l.errorsSeen > 0 {
|
|
| 153 |
- l.Noticef("sumErr", "Errors seen: %d", l.errorsSeen)
|
|
| 152 |
+ if errorsSeen > 0 {
|
|
| 153 |
+ l.Noticet("sumErr", "Errors seen: {{.errors}}", Hash{"errors": errorsSeen})
|
|
| 154 | 154 |
} |
| 155 |
- if l.warningsSeen == 0 && l.errorsSeen == 0 {
|
|
| 155 |
+ if warningsSeen == 0 && errorsSeen == 0 {
|
|
| 156 | 156 |
l.Notice("sumNone", "Completed with no errors or warnings seen.")
|
| 157 | 157 |
} |
| 158 | 158 |
} |
| 159 | 159 |
|
| 160 |
-func (l *Logger) LogMessage(level Level, message Message) {
|
|
| 161 |
- // if there's no logger, return silently |
|
| 162 |
- if l == nil {
|
|
| 160 |
+func (l *Logger) LogEntry(entry Entry) {
|
|
| 161 |
+ if l == nil { // if there's no logger, return silently
|
|
| 163 | 162 |
return |
| 164 | 163 |
} |
| 165 |
- |
|
| 166 |
- // track how many of every type we've seen (probably unnecessary) |
|
| 167 |
- if level.Level == ErrorLevel.Level {
|
|
| 168 |
- l.errorsSeen += 1 |
|
| 169 |
- } else if level.Level == WarnLevel.Level {
|
|
| 170 |
- l.warningsSeen += 1 |
|
| 171 |
- } |
|
| 172 |
- |
|
| 173 |
- if level.Level < l.level.Level {
|
|
| 174 |
- return |
|
| 175 |
- } |
|
| 176 |
- |
|
| 177 |
- if len(message.Template) == 0 {
|
|
| 178 |
- l.Write(LogEntry{level, message})
|
|
| 164 |
+ if entry.Level.Level < l.level.Level { // logging level says skip this entry
|
|
| 179 | 165 |
return |
| 180 | 166 |
} |
| 181 | 167 |
|
| 182 |
- // if given a template, convert it to text |
|
| 183 |
- parsedTmpl, err := template.New(message.ID).Parse(message.Template) |
|
| 184 |
- if err != nil {
|
|
| 185 |
- templateErrorMessage := Message{
|
|
| 186 |
- ID: "templateParseErr", |
|
| 187 |
- TemplateData: map[string]interface{}{
|
|
| 188 |
- "error": err.Error(), |
|
| 189 |
- "originalMessage": message, |
|
| 190 |
- }, |
|
| 168 |
+ if msg := &entry.Message; msg.EvaluatedText == "" && msg.Template != "" {
|
|
| 169 |
+ // if given a template instead of text, convert it to text |
|
| 170 |
+ parsedTmpl, err := template.New(msg.ID).Parse(msg.Template) |
|
| 171 |
+ if err != nil {
|
|
| 172 |
+ entry.Message = Message{
|
|
| 173 |
+ ID: "templateParseErr", |
|
| 174 |
+ TemplateData: Hash{
|
|
| 175 |
+ "error": err.Error(), |
|
| 176 |
+ "originalMessage": msg, |
|
| 177 |
+ }, |
|
| 178 |
+ EvaluatedText: fmt.Sprintf("Error parsing template for %s:\n%s=== Error was:\n%v\nOriginal message:\n%#v", msg.ID, msg.Template, err, msg),
|
|
| 179 |
+ } |
|
| 180 |
+ entry.ID = entry.Message.ID |
|
| 181 |
+ l.Write(entry) |
|
| 182 |
+ return |
|
| 191 | 183 |
} |
| 192 |
- l.LogMessage(level, templateErrorMessage) |
|
| 193 |
- return |
|
| 194 |
- } |
|
| 195 | 184 |
|
| 196 |
- var buff bytes.Buffer |
|
| 197 |
- err = parsedTmpl.Execute(&buff, message.TemplateData) |
|
| 198 |
- if err != nil {
|
|
| 199 |
- templateErrorMessage := Message{
|
|
| 200 |
- ID: "templateParseErr", |
|
| 201 |
- TemplateData: map[string]interface{}{
|
|
| 202 |
- "error": err.Error(), |
|
| 203 |
- "originalMessage": message, |
|
| 204 |
- }, |
|
| 185 |
+ var buff bytes.Buffer |
|
| 186 |
+ err = parsedTmpl.Execute(&buff, msg.TemplateData) |
|
| 187 |
+ if err != nil {
|
|
| 188 |
+ entry.Message = Message{
|
|
| 189 |
+ ID: "templateExecErr", |
|
| 190 |
+ TemplateData: Hash{
|
|
| 191 |
+ "error": err.Error(), |
|
| 192 |
+ "originalMessage": msg, |
|
| 193 |
+ }, |
|
| 194 |
+ EvaluatedText: fmt.Sprintf("Error executing template for %s:\n%s=== Error was:\n%v\nOriginal message:\n%#v", msg.ID, msg.Template, err, msg),
|
|
| 195 |
+ } |
|
| 196 |
+ entry.ID = entry.Message.ID |
|
| 197 |
+ l.Write(entry) |
|
| 198 |
+ return |
|
| 205 | 199 |
} |
| 206 |
- l.LogMessage(level, templateErrorMessage) |
|
| 207 |
- return |
|
| 208 | 200 |
|
| 201 |
+ msg.EvaluatedText = buff.String() |
|
| 209 | 202 |
} |
| 210 | 203 |
|
| 211 |
- message.EvaluatedText = buff.String() |
|
| 212 |
- l.Write(LogEntry{level, message})
|
|
| 204 |
+ l.Write(entry) |
|
| 213 | 205 |
} |
| 214 | 206 |
|
| 215 | 207 |
// Convenience functions |
| 216 | 208 |
func (l *Logger) Error(id string, text string) {
|
| 217 |
- l.Logp(ErrorLevel, id, text) |
|
| 209 |
+ l.logp(ErrorLevel, id, text) |
|
| 218 | 210 |
} |
| 219 | 211 |
func (l *Logger) Errorf(id string, msg string, a ...interface{}) {
|
| 220 |
- l.Logpf(ErrorLevel, id, msg, a...) |
|
| 212 |
+ l.logf(ErrorLevel, id, msg, a...) |
|
| 221 | 213 |
} |
| 222 |
-func (l *Logger) Errorm(message Message) {
|
|
| 223 |
- l.LogMessage(ErrorLevel, message) |
|
| 214 |
+func (l *Logger) Errort(id string, template string, data interface{}) {
|
|
| 215 |
+ l.logt(ErrorLevel, id, template, data) |
|
| 224 | 216 |
} |
| 225 | 217 |
func (l *Logger) Warn(id string, text string) {
|
| 226 |
- l.Logp(WarnLevel, id, text) |
|
| 218 |
+ l.logp(WarnLevel, id, text) |
|
| 227 | 219 |
} |
| 228 | 220 |
func (l *Logger) Warnf(id string, msg string, a ...interface{}) {
|
| 229 |
- l.Logpf(WarnLevel, id, msg, a...) |
|
| 230 |
-} |
|
| 231 |
-func (l *Logger) Warnm(message Message) {
|
|
| 232 |
- l.LogMessage(WarnLevel, message) |
|
| 221 |
+ l.logf(WarnLevel, id, msg, a...) |
|
| 233 | 222 |
} |
| 234 | 223 |
func (l *Logger) Info(id string, text string) {
|
| 235 |
- l.Logp(InfoLevel, id, text) |
|
| 224 |
+ l.logp(InfoLevel, id, text) |
|
| 236 | 225 |
} |
| 237 | 226 |
func (l *Logger) Infof(id string, msg string, a ...interface{}) {
|
| 238 |
- l.Logpf(InfoLevel, id, msg, a...) |
|
| 239 |
-} |
|
| 240 |
-func (l *Logger) Infom(message Message) {
|
|
| 241 |
- l.LogMessage(InfoLevel, message) |
|
| 227 |
+ l.logf(InfoLevel, id, msg, a...) |
|
| 242 | 228 |
} |
| 243 | 229 |
func (l *Logger) Notice(id string, text string) {
|
| 244 |
- l.Logp(NoticeLevel, id, text) |
|
| 230 |
+ l.logp(NoticeLevel, id, text) |
|
| 245 | 231 |
} |
| 246 | 232 |
func (l *Logger) Noticef(id string, msg string, a ...interface{}) {
|
| 247 |
- l.Logpf(NoticeLevel, id, msg, a...) |
|
| 233 |
+ l.logf(NoticeLevel, id, msg, a...) |
|
| 248 | 234 |
} |
| 249 |
-func (l *Logger) Noticem(message Message) {
|
|
| 250 |
- l.LogMessage(NoticeLevel, message) |
|
| 235 |
+func (l *Logger) Noticet(id string, template string, data interface{}) {
|
|
| 236 |
+ l.logt(NoticeLevel, id, template, data) |
|
| 251 | 237 |
} |
| 252 | 238 |
func (l *Logger) Debug(id string, text string) {
|
| 253 |
- l.Logp(DebugLevel, id, text) |
|
| 239 |
+ l.logp(DebugLevel, id, text) |
|
| 254 | 240 |
} |
| 255 | 241 |
func (l *Logger) Debugf(id string, msg string, a ...interface{}) {
|
| 256 |
- l.Logpf(DebugLevel, id, msg, a...) |
|
| 257 |
-} |
|
| 258 |
-func (l *Logger) Debugm(message Message) {
|
|
| 259 |
- l.LogMessage(DebugLevel, message) |
|
| 242 |
+ l.logf(DebugLevel, id, msg, a...) |
|
| 260 | 243 |
} |
| 261 | 244 |
|
| 262 |
-func (l *Logger) Logp(level Level, id string, text string) {
|
|
| 263 |
- l.LogMessage(level, Message{ID: id, EvaluatedText: text})
|
|
| 245 |
+func origin(skip int) string {
|
|
| 246 |
+ if _, file, _, ok := runtime.Caller(skip + 1); ok {
|
|
| 247 |
+ paths := strings.SplitAfter(file, "github.com/") |
|
| 248 |
+ return "controller " + paths[len(paths)-1] |
|
| 249 |
+ } else {
|
|
| 250 |
+ return "unknown" |
|
| 251 |
+ } |
|
| 252 |
+} |
|
| 253 |
+func (l *Logger) logp(level Level, id string, text string) {
|
|
| 254 |
+ l.LogEntry(Entry{id, origin(1), level, Message{ID: id, EvaluatedText: text}})
|
|
| 264 | 255 |
} |
| 265 |
-func (l *Logger) Logpf(level Level, id string, msg string, a ...interface{}) {
|
|
| 266 |
- l.Logp(level, id, fmt.Sprintf(msg, a...)) |
|
| 256 |
+func (l *Logger) logf(level Level, id string, msg string, a ...interface{}) {
|
|
| 257 |
+ l.LogEntry(Entry{id, origin(1), level, Message{ID: id, EvaluatedText: fmt.Sprintf(msg, a...)}})
|
|
| 258 |
+} |
|
| 259 |
+func (l *Logger) logt(level Level, id string, template string, data interface{}) {
|
|
| 260 |
+ l.LogEntry(Entry{id, origin(1), level, Message{ID: id, Template: template, TemplateData: data}})
|
|
| 267 | 261 |
} |
| 268 | 262 |
|
| 269 | 263 |
func (l *Logger) Finish() {
|
| 270 | 264 |
l.loggerType.Finish() |
| 271 | 265 |
} |
| 272 | 266 |
|
| 273 |
-func (l *Logger) ErrorsSeen() bool {
|
|
| 274 |
- return l.errorsSeen > 0 |
|
| 275 |
-} |
|
| 276 |
- |
|
| 277 | 267 |
// turn excess lines into [...] |
| 278 | 268 |
func LimitLines(msg string, n int) string {
|
| 279 | 269 |
lines := strings.SplitN(msg, "\n", n+1) |
| ... | ... |
@@ -31,11 +31,14 @@ func IsTerminal(w io.Writer) bool {
|
| 31 | 31 |
return ok && term.IsTerminal(file.Fd()) |
| 32 | 32 |
} |
| 33 | 33 |
|
| 34 |
-func (t *textLogger) Write(entry LogEntry) {
|
|
| 34 |
+func (t *textLogger) Write(entry Entry) {
|
|
| 35 | 35 |
if t.ttyOutput {
|
| 36 | 36 |
ct.ChangeColor(entry.Level.Color, entry.Level.Bright, ct.None, false) |
| 37 | 37 |
} |
| 38 |
- text := strings.TrimSpace(entry.EvaluatedText) |
|
| 38 |
+ text := strings.TrimSpace(entry.Message.EvaluatedText) |
|
| 39 |
+ if entry.Level.Level >= WarnLevel.Level {
|
|
| 40 |
+ text = fmt.Sprintf("[ID \"%s\" from %s]\n", entry.ID, entry.Origin) + text
|
|
| 41 |
+ } |
|
| 39 | 42 |
if strings.Contains(text, "\n") { // separate multiline comments with newlines
|
| 40 | 43 |
if !t.lastNewline {
|
| 41 | 44 |
fmt.Fprintln(t.out) // separate from previous one-line log msg |
| 18 | 18 |
deleted file mode 100644 |
| ... | ... |
@@ -1,48 +0,0 @@ |
| 1 |
-package master |
|
| 2 |
- |
|
| 3 |
-import ( |
|
| 4 |
- "errors" |
|
| 5 |
- |
|
| 6 |
- configapilatest "github.com/openshift/origin/pkg/cmd/server/api/latest" |
|
| 7 |
- configvalidation "github.com/openshift/origin/pkg/cmd/server/api/validation" |
|
| 8 |
- "github.com/openshift/origin/pkg/diagnostics/log" |
|
| 9 |
-) |
|
| 10 |
- |
|
| 11 |
-// MasterConfigCheck |
|
| 12 |
-type MasterConfigCheck struct {
|
|
| 13 |
- MasterConfigFile string |
|
| 14 |
- |
|
| 15 |
- Log *log.Logger |
|
| 16 |
-} |
|
| 17 |
- |
|
| 18 |
-func (d MasterConfigCheck) Description() string {
|
|
| 19 |
- return "Check the master config file" |
|
| 20 |
-} |
|
| 21 |
-func (d MasterConfigCheck) CanRun() (bool, error) {
|
|
| 22 |
- if len(d.MasterConfigFile) == 0 {
|
|
| 23 |
- return false, errors.New("must have master config file")
|
|
| 24 |
- } |
|
| 25 |
- |
|
| 26 |
- return true, nil |
|
| 27 |
-} |
|
| 28 |
-func (d MasterConfigCheck) Check() (bool, []log.Message, []error, []error) {
|
|
| 29 |
- if _, err := d.CanRun(); err != nil {
|
|
| 30 |
- return false, nil, nil, []error{err}
|
|
| 31 |
- } |
|
| 32 |
- |
|
| 33 |
- d.Log.Debugf("discMCfile", "Looking for master config file at '%s'", d.MasterConfigFile)
|
|
| 34 |
- masterConfig, err := configapilatest.ReadAndResolveMasterConfig(d.MasterConfigFile) |
|
| 35 |
- if err != nil {
|
|
| 36 |
- d.Log.Errorf("discMCfail", "Could not read master config file '%s':\n(%T) %[2]v", d.MasterConfigFile, err)
|
|
| 37 |
- |
|
| 38 |
- return false, nil, nil, []error{err}
|
|
| 39 |
- } |
|
| 40 |
- |
|
| 41 |
- d.Log.Infof("discMCfound", "Found a master config file:\n%[1]s", d.MasterConfigFile)
|
|
| 42 |
- |
|
| 43 |
- if validationResults := configvalidation.ValidateMasterConfig(masterConfig); len(validationResults.Errors) > 0 {
|
|
| 44 |
- return false, nil, nil, validationResults.Errors |
|
| 45 |
- } |
|
| 46 |
- |
|
| 47 |
- return true, nil, nil, nil |
|
| 48 |
-} |
| 49 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,48 +0,0 @@ |
| 1 |
-package node |
|
| 2 |
- |
|
| 3 |
-import ( |
|
| 4 |
- "errors" |
|
| 5 |
- |
|
| 6 |
- configapilatest "github.com/openshift/origin/pkg/cmd/server/api/latest" |
|
| 7 |
- configvalidation "github.com/openshift/origin/pkg/cmd/server/api/validation" |
|
| 8 |
- "github.com/openshift/origin/pkg/diagnostics/log" |
|
| 9 |
-) |
|
| 10 |
- |
|
| 11 |
-// NodeConfigCheck |
|
| 12 |
-type NodeConfigCheck struct {
|
|
| 13 |
- NodeConfigFile string |
|
| 14 |
- |
|
| 15 |
- Log *log.Logger |
|
| 16 |
-} |
|
| 17 |
- |
|
| 18 |
-func (d NodeConfigCheck) Description() string {
|
|
| 19 |
- return "Check the node config file" |
|
| 20 |
-} |
|
| 21 |
-func (d NodeConfigCheck) CanRun() (bool, error) {
|
|
| 22 |
- if len(d.NodeConfigFile) == 0 {
|
|
| 23 |
- return false, errors.New("must have node config file")
|
|
| 24 |
- } |
|
| 25 |
- |
|
| 26 |
- return true, nil |
|
| 27 |
-} |
|
| 28 |
-func (d NodeConfigCheck) Check() (bool, []log.Message, []error, []error) {
|
|
| 29 |
- if _, err := d.CanRun(); err != nil {
|
|
| 30 |
- return false, nil, nil, []error{err}
|
|
| 31 |
- } |
|
| 32 |
- |
|
| 33 |
- d.Log.Debugf("discNCfile", "Looking for node config file at '%s'", d.NodeConfigFile)
|
|
| 34 |
- nodeConfig, err := configapilatest.ReadAndResolveNodeConfig(d.NodeConfigFile) |
|
| 35 |
- if err != nil {
|
|
| 36 |
- d.Log.Errorf("discNCfail", "Could not read node config file '%s':\n(%T) %[2]v", d.NodeConfigFile, err)
|
|
| 37 |
- |
|
| 38 |
- return false, nil, nil, []error{err}
|
|
| 39 |
- } |
|
| 40 |
- |
|
| 41 |
- d.Log.Infof("discNCfound", "Found a node config file:\n%[1]s", d.NodeConfigFile)
|
|
| 42 |
- |
|
| 43 |
- if validationErrors := configvalidation.ValidateNodeConfig(nodeConfig); len(validationErrors) > 0 {
|
|
| 44 |
- return false, nil, nil, validationErrors |
|
| 45 |
- } |
|
| 46 |
- |
|
| 47 |
- return true, nil, nil, nil |
|
| 48 |
-} |
| ... | ... |
@@ -3,38 +3,44 @@ package systemd |
| 3 | 3 |
import ( |
| 4 | 4 |
"bufio" |
| 5 | 5 |
"encoding/json" |
| 6 |
- "fmt" |
|
| 7 | 6 |
"io" |
| 8 | 7 |
"os/exec" |
| 8 |
+ "strconv" |
|
| 9 |
+ "time" |
|
| 9 | 10 |
|
| 10 | 11 |
"github.com/openshift/origin/pkg/diagnostics/log" |
| 11 | 12 |
"github.com/openshift/origin/pkg/diagnostics/types" |
| 12 |
- "github.com/openshift/origin/pkg/diagnostics/types/diagnostic" |
|
| 13 |
+) |
|
| 14 |
+ |
|
| 15 |
+const ( |
|
| 16 |
+ sdLogReadErr = `Diagnostics failed to query journalctl for the '%s' unit logs. |
|
| 17 |
+This should be very unusual, so please report this error: |
|
| 18 |
+%s` |
|
| 13 | 19 |
) |
| 14 | 20 |
|
| 15 | 21 |
// AnalyzeLogs |
| 16 | 22 |
type AnalyzeLogs struct {
|
| 17 | 23 |
SystemdUnits map[string]types.SystemdUnit |
| 24 |
+} |
|
| 18 | 25 |
|
| 19 |
- Log *log.Logger |
|
| 26 |
+func (d AnalyzeLogs) Name() string {
|
|
| 27 |
+ return "AnalyzeLogs" |
|
| 20 | 28 |
} |
| 21 | 29 |
|
| 22 | 30 |
func (d AnalyzeLogs) Description() string {
|
| 23 |
- return "Check for problems in systemd service logs since each service last started" |
|
| 31 |
+ return "Check for recent problems in systemd service logs" |
|
| 24 | 32 |
} |
| 33 |
+ |
|
| 25 | 34 |
func (d AnalyzeLogs) CanRun() (bool, error) {
|
| 26 | 35 |
return true, nil |
| 27 | 36 |
} |
| 28 |
-func (d AnalyzeLogs) Check() (bool, []log.Message, []error, []error) {
|
|
| 29 |
- infos := []log.Message{}
|
|
| 30 |
- warnings := []error{}
|
|
| 31 |
- errors := []error{}
|
|
| 37 |
+ |
|
| 38 |
+func (d AnalyzeLogs) Check() *types.DiagnosticResult {
|
|
| 39 |
+ r := types.NewDiagnosticResult("AnalyzeLogs")
|
|
| 32 | 40 |
|
| 33 | 41 |
for _, unit := range unitLogSpecs {
|
| 34 | 42 |
if svc := d.SystemdUnits[unit.Name]; svc.Enabled || svc.Active {
|
| 35 |
- checkMessage := log.Message{ID: "sdCheckLogs", EvaluatedText: fmt.Sprintf("Checking journalctl logs for '%s' service", unit.Name)}
|
|
| 36 |
- d.Log.LogMessage(log.InfoLevel, checkMessage) |
|
| 37 |
- infos = append(infos, checkMessage) |
|
| 43 |
+ r.Infof("sdCheckLogs", "Checking journalctl logs for '%s' service", unit.Name)
|
|
| 38 | 44 |
|
| 39 | 45 |
cmd := exec.Command("journalctl", "-ru", unit.Name, "--output=json")
|
| 40 | 46 |
// JSON comes out of journalctl one line per record |
| ... | ... |
@@ -50,60 +56,54 @@ func (d AnalyzeLogs) Check() (bool, []log.Message, []error, []error) {
|
| 50 | 50 |
}(cmd) |
| 51 | 51 |
|
| 52 | 52 |
if err != nil {
|
| 53 |
- diagnosticError := diagnostic.NewDiagnosticError("sdLogReadErr", fmt.Sprintf(sdLogReadErr, unit.Name, errStr(err)), err)
|
|
| 54 |
- d.Log.Error(diagnosticError.ID, diagnosticError.Explanation) |
|
| 55 |
- errors = append(errors, diagnosticError) |
|
| 56 |
- |
|
| 57 |
- return false, infos, warnings, errors |
|
| 53 |
+ r.Errorf("sdLogReadErr", err, sdLogReadErr, unit.Name, errStr(err))
|
|
| 54 |
+ return r |
|
| 58 | 55 |
} |
| 59 | 56 |
defer func() { // close out pipe once done reading
|
| 60 | 57 |
reader.Close() |
| 61 | 58 |
cmd.Wait() |
| 62 | 59 |
}() |
| 63 |
- entryTemplate := logEntry{Message: `json:"MESSAGE"`}
|
|
| 60 |
+ timeLimit := time.Now().Add(-time.Hour) // if it didn't happen in the last hour, probably not too relevant |
|
| 64 | 61 |
matchCopy := append([]logMatcher(nil), unit.LogMatchers...) // make a copy, will remove matchers after they match something |
| 65 |
- for lineReader.Scan() { // each log entry is a line
|
|
| 62 |
+ lineCount := 0 // each log entry is a line |
|
| 63 |
+ for lineReader.Scan() {
|
|
| 64 |
+ lineCount += 1 |
|
| 66 | 65 |
if len(matchCopy) == 0 { // if no rules remain to match
|
| 67 | 66 |
break // don't waste time reading more log entries |
| 68 | 67 |
} |
| 69 |
- bytes, entry := lineReader.Bytes(), entryTemplate |
|
| 68 |
+ bytes, entry := lineReader.Bytes(), logEntry{}
|
|
| 70 | 69 |
if err := json.Unmarshal(bytes, &entry); err != nil {
|
| 71 |
- badJSONMessage := log.Message{ID: "sdLogBadJSON", EvaluatedText: fmt.Sprintf("Couldn't read the JSON for this log message:\n%s\nGot error %s", string(bytes), errStr(err))}
|
|
| 72 |
- d.Log.LogMessage(log.DebugLevel, badJSONMessage) |
|
| 73 |
- |
|
| 70 |
+ r.Debugf("sdLogBadJSON", "Couldn't read the JSON for this log message:\n%s\nGot error %s", string(bytes), errStr(err))
|
|
| 74 | 71 |
} else {
|
| 72 |
+ if lineCount > 500 && stampTooOld(entry.TimeStamp, timeLimit) {
|
|
| 73 |
+ r.Debugf("sdLogTrunc", "Stopped reading %s log: timestamp %s too old", unit.Name, entry.TimeStamp)
|
|
| 74 |
+ break // if we've analyzed at least 500 entries, stop when age limit reached (don't scan days of logs) |
|
| 75 |
+ } |
|
| 75 | 76 |
if unit.StartMatch.MatchString(entry.Message) {
|
| 76 |
- break // saw the log message where the unit started; done looking. |
|
| 77 |
+ break // saw log message for unit startup; don't analyze previous logs |
|
| 77 | 78 |
} |
| 78 | 79 |
for index, match := range matchCopy { // match log message against provided matchers
|
| 79 | 80 |
if strings := match.Regexp.FindStringSubmatch(entry.Message); strings != nil {
|
| 80 | 81 |
// if matches: print interpretation, remove from matchCopy, and go on to next log entry |
| 81 |
- keep := match.KeepAfterMatch |
|
| 82 |
- if match.Interpret != nil {
|
|
| 83 |
- currKeep, currInfos, currWarnings, currErrors := match.Interpret(d.Log, &entry, strings) |
|
| 82 |
+ keep := match.KeepAfterMatch // generic keep logic |
|
| 83 |
+ if match.Interpret != nil { // apply custom match logic
|
|
| 84 |
+ currKeep, result := match.Interpret(&entry, strings) |
|
| 84 | 85 |
keep = currKeep |
| 85 |
- infos = append(infos, currInfos...) |
|
| 86 |
- warnings = append(warnings, currWarnings...) |
|
| 87 |
- errors = append(errors, currErrors...) |
|
| 88 |
- |
|
| 89 |
- } else {
|
|
| 90 |
- text := fmt.Sprintf("Found '%s' journald log message:\n %s\n", unit.Name, entry.Message) + match.Interpretation
|
|
| 91 |
- message := log.Message{ID: match.Id, EvaluatedText: text, TemplateData: map[string]string{"unit": unit.Name, "logMsg": entry.Message}}
|
|
| 92 |
- d.Log.LogMessage(match.Level, message) |
|
| 93 |
- diagnosticError := diagnostic.NewDiagnosticError(match.Id, text, nil) |
|
| 86 |
+ r.Append(result) |
|
| 87 |
+ } else { // apply generic match processing
|
|
| 88 |
+ template := "Found '{{.unit}}' journald log message:\n {{.logMsg}}\n{{.interpretation}}"
|
|
| 89 |
+ templateData := log.Hash{"unit": unit.Name, "logMsg": entry.Message, "interpretation": match.Interpretation}
|
|
| 94 | 90 |
|
| 95 | 91 |
switch match.Level {
|
| 96 |
- case log.InfoLevel, log.NoticeLevel: |
|
| 97 |
- infos = append(infos, message) |
|
| 98 |
- |
|
| 92 |
+ case log.DebugLevel: |
|
| 93 |
+ r.Debugt(match.Id, template, templateData) |
|
| 94 |
+ case log.InfoLevel: |
|
| 95 |
+ r.Infot(match.Id, template, templateData) |
|
| 99 | 96 |
case log.WarnLevel: |
| 100 |
- warnings = append(warnings, diagnosticError) |
|
| 101 |
- |
|
| 97 |
+ r.Warnt(match.Id, nil, template, templateData) |
|
| 102 | 98 |
case log.ErrorLevel: |
| 103 |
- errors = append(errors, diagnosticError) |
|
| 104 |
- |
|
| 99 |
+ r.Errort(match.Id, nil, template, templateData) |
|
| 105 | 100 |
} |
| 106 |
- |
|
| 107 | 101 |
} |
| 108 | 102 |
|
| 109 | 103 |
if !keep { // remove matcher once seen
|
| ... | ... |
@@ -118,11 +118,12 @@ func (d AnalyzeLogs) Check() (bool, []log.Message, []error, []error) {
|
| 118 | 118 |
} |
| 119 | 119 |
} |
| 120 | 120 |
|
| 121 |
- return (len(errors) == 0), infos, warnings, errors |
|
| 121 |
+ return r |
|
| 122 | 122 |
} |
| 123 | 123 |
|
| 124 |
-const ( |
|
| 125 |
- sdLogReadErr = `Diagnostics failed to query journalctl for the '%s' unit logs. |
|
| 126 |
-This should be very unusual, so please report this error: |
|
| 127 |
-%s` |
|
| 128 |
-) |
|
| 124 |
+func stampTooOld(stamp string, timeLimit time.Time) bool {
|
|
| 125 |
+ if epochns, err := strconv.ParseInt(stamp, 10, 64); err == nil {
|
|
| 126 |
+ return time.Unix(epochns/1000000, 0).Before(timeLimit) |
|
| 127 |
+ } |
|
| 128 |
+ return true // something went wrong, stop looking... |
|
| 129 |
+} |
| ... | ... |
@@ -12,7 +12,7 @@ import ( |
| 12 | 12 |
func GetSystemdUnits(logger *log.Logger) map[string]types.SystemdUnit {
|
| 13 | 13 |
systemdUnits := map[string]types.SystemdUnit{}
|
| 14 | 14 |
|
| 15 |
- logger.Notice("discBegin", "Beginning systemd discovery")
|
|
| 15 |
+ logger.Notice("discBeginSysd", "Performing systemd discovery")
|
|
| 16 | 16 |
for _, name := range []string{"openshift", "openshift-master", "openshift-node", "openshift-sdn-master", "openshift-sdn-node", "docker", "openvswitch", "iptables", "etcd", "kubernetes"} {
|
| 17 | 17 |
systemdUnits[name] = discoverSystemdUnit(logger, name) |
| 18 | 18 |
|
| ... | ... |
@@ -5,11 +5,12 @@ import ( |
| 5 | 5 |
|
| 6 | 6 |
"fmt" |
| 7 | 7 |
"github.com/openshift/origin/pkg/diagnostics/log" |
| 8 |
- "github.com/openshift/origin/pkg/diagnostics/types/diagnostic" |
|
| 8 |
+ "github.com/openshift/origin/pkg/diagnostics/types" |
|
| 9 | 9 |
) |
| 10 | 10 |
|
| 11 | 11 |
type logEntry struct {
|
| 12 |
- Message string // I feel certain we will want more fields at some point |
|
| 12 |
+ Message string `json:"MESSAGE"` |
|
| 13 |
+ TimeStamp string `json:"__REALTIME_TIMESTAMP"` // epoch + ns |
|
| 13 | 14 |
} |
| 14 | 15 |
|
| 15 | 16 |
type logMatcher struct { // regex for scanning log messages and interpreting them when found
|
| ... | ... |
@@ -19,10 +20,9 @@ type logMatcher struct { // regex for scanning log messages and interpreting the
|
| 19 | 19 |
Interpretation string // log with above level+id if it's simple |
| 20 | 20 |
KeepAfterMatch bool // usually note only first matched entry, ignore rest |
| 21 | 21 |
Interpret func( // run this for custom logic on match |
| 22 |
- logger *log.Logger, |
|
| 23 | 22 |
entry *logEntry, |
| 24 | 23 |
matches []string, |
| 25 |
- ) (bool, []log.Message, []error, []error) // KeepAfterMatch? |
|
| 24 |
+ ) (bool /* KeepAfterMatch? */, *types.DiagnosticResult) |
|
| 26 | 25 |
} |
| 27 | 26 |
|
| 28 | 27 |
type unitSpec struct {
|
| ... | ... |
@@ -32,8 +32,8 @@ type unitSpec struct {
|
| 32 | 32 |
} |
| 33 | 33 |
|
| 34 | 34 |
// |
| 35 |
-// -------- Things that feed into the diagnostics definitions ----------- |
|
| 36 |
-// Search for Diagnostics for the actual diagnostics. |
|
| 35 |
+// -------- These are things that feed into the diagnostics definitions ----------- |
|
| 36 |
+// |
|
| 37 | 37 |
|
| 38 | 38 |
// Reusable log matchers: |
| 39 | 39 |
var badImageTemplate = logMatcher{
|
| ... | ... |
@@ -81,15 +81,15 @@ logs after the node is actually available.`, |
| 81 | 81 |
// TODO: don't rely on ipv4 format, should be ipv6 "soon" |
| 82 | 82 |
Regexp: regexp.MustCompile("http: TLS handshake error from ([\\d.]+):\\d+: remote error: bad certificate"),
|
| 83 | 83 |
Level: log.WarnLevel, |
| 84 |
- Interpret: func(logger *log.Logger, entry *logEntry, matches []string) (bool, []log.Message, []error, []error) {
|
|
| 85 |
- warnings := []error{}
|
|
| 84 |
+ Interpret: func(entry *logEntry, matches []string) (bool, *types.DiagnosticResult) {
|
|
| 85 |
+ r := types.NewDiagnosticResult("openshift-master.journald")
|
|
| 86 | 86 |
|
| 87 | 87 |
client := matches[1] |
| 88 | 88 |
prelude := fmt.Sprintf("Found 'openshift-master' journald log message:\n %s\n", entry.Message)
|
| 89 | 89 |
if tlsClientErrorSeen == nil { // first time this message was seen
|
| 90 | 90 |
tlsClientErrorSeen = map[string]bool{client: true}
|
| 91 | 91 |
// TODO: too generic, adjust message depending on subnet of the "from" address |
| 92 |
- diagnosticError := diagnostic.NewDiagnosticError("sdLogOMreBadCert", prelude+`
|
|
| 92 |
+ r.Warn("sdLogOMreBadCert", nil, prelude+`
|
|
| 93 | 93 |
This error indicates that a client attempted to connect to the master |
| 94 | 94 |
HTTPS API server but broke off the connection because the master's |
| 95 | 95 |
certificate is not validated by a cerificate authority (CA) acceptable |
| ... | ... |
@@ -127,21 +127,13 @@ log message: |
| 127 | 127 |
(so this message may simply indicate that the master generated a new |
| 128 | 128 |
server certificate, e.g. to add a different --public-master, and a |
| 129 | 129 |
browser hasn't accepted it yet and is still attempting API calls; |
| 130 |
- try logging out of the console and back in again).`, nil) |
|
| 131 |
- |
|
| 132 |
- message := log.Message{ID: diagnosticError.ID, EvaluatedText: diagnosticError.Explanation, TemplateData: map[string]string{"client": client}}
|
|
| 133 |
- logger.LogMessage(log.WarnLevel, message) |
|
| 134 |
- warnings = append(warnings, diagnosticError) |
|
| 130 |
+ try logging out of the console and back in again).`) |
|
| 135 | 131 |
|
| 136 | 132 |
} else if !tlsClientErrorSeen[client] {
|
| 137 | 133 |
tlsClientErrorSeen[client] = true |
| 138 |
- diagnosticError := diagnostic.NewDiagnosticError("sdLogOMreBadCert", prelude+`This message was diagnosed above, but for a different client address.`, nil)
|
|
| 139 |
- message := log.Message{ID: diagnosticError.ID, EvaluatedText: diagnosticError.Explanation, TemplateData: map[string]string{"client": client}}
|
|
| 140 |
- logger.LogMessage(log.WarnLevel, message) |
|
| 141 |
- warnings = append(warnings, diagnosticError) |
|
| 142 |
- |
|
| 134 |
+ r.Warn("sdLogOMreBadCert", nil, prelude+`This message was diagnosed above, but for a different client address.`)
|
|
| 143 | 135 |
} // else, it's a repeat, don't mention it |
| 144 |
- return true, nil, warnings, nil // show once for every client failing to connect, not just the first |
|
| 136 |
+ return true /* show once for every client failing to connect, not just the first */, r |
|
| 145 | 137 |
}, |
| 146 | 138 |
}, |
| 147 | 139 |
{
|
| ... | ... |
@@ -167,11 +159,6 @@ message for any node with this problem. |
| 167 | 167 |
}, |
| 168 | 168 |
}, |
| 169 | 169 |
{
|
| 170 |
- Name: "openshift-sdn-master", |
|
| 171 |
- StartMatch: regexp.MustCompile("Starting OpenShift SDN Master"),
|
|
| 172 |
- LogMatchers: []logMatcher{},
|
|
| 173 |
- }, |
|
| 174 |
- {
|
|
| 175 | 170 |
Name: "openshift-node", |
| 176 | 171 |
StartMatch: regexp.MustCompile("Starting an OpenShift node"),
|
| 177 | 172 |
LogMatchers: []logMatcher{
|
| ... | ... |
@@ -236,25 +223,19 @@ to the .kubeconfig specified in /etc/sysconfig/openshift-node |
| 236 | 236 |
This host will not function as a node until this is resolved. Pods |
| 237 | 237 |
scheduled for this node will remain in pending or unknown state forever.`, |
| 238 | 238 |
}, |
| 239 |
- }, |
|
| 240 |
- }, |
|
| 241 |
- {
|
|
| 242 |
- Name: "openshift-sdn-node", |
|
| 243 |
- StartMatch: regexp.MustCompile("Starting OpenShift SDN node"),
|
|
| 244 |
- LogMatchers: []logMatcher{
|
|
| 245 | 239 |
{
|
| 246 | 240 |
Regexp: regexp.MustCompile("Could not find an allocated subnet for this minion.*Waiting.."),
|
| 247 | 241 |
Level: log.WarnLevel, |
| 248 | 242 |
Id: "sdLogOSNnoSubnet", |
| 249 | 243 |
Interpretation: ` |
| 250 |
-This warning occurs when openshift-sdn-node is trying to request the |
|
| 244 |
+This warning occurs when openshift-node is trying to request the |
|
| 251 | 245 |
SDN subnet it should be configured with according to openshift-sdn-master, |
| 252 | 246 |
but either can't connect to it ("All the given peers are not reachable")
|
| 253 | 247 |
or has not yet been assigned a subnet ("Key not found").
|
| 254 | 248 |
|
| 255 | 249 |
This can just be a matter of waiting for the master to become fully |
| 256 | 250 |
available and define a record for the node (aka "minion") to use, |
| 257 |
-and openshift-sdn-node will wait until that occurs, so the presence |
|
| 251 |
+and openshift-node will wait until that occurs, so the presence |
|
| 258 | 252 |
of this message in the node log isn't necessarily a problem as |
| 259 | 253 |
long as the SDN is actually working, but this message may help indicate |
| 260 | 254 |
the problem if it is not working. |
| ... | ... |
@@ -262,8 +243,8 @@ the problem if it is not working. |
| 262 | 262 |
If the master is available and this node's record is defined and this |
| 263 | 263 |
message persists, then it may be a sign of a different misconfiguration. |
| 264 | 264 |
Unfortunately the message is not specific about why the connection failed. |
| 265 |
-Check MASTER_URL in /etc/sysconfig/openshift-sdn-node: |
|
| 266 |
- * Is the protocol https? It should be http. |
|
| 265 |
+Check the master's URL in the node configuration. |
|
| 266 |
+ * Is the protocol http? It should be https. |
|
| 267 | 267 |
* Can you reach the address and port from the node using curl? |
| 268 | 268 |
("404 page not found" is correct response)`,
|
| 269 | 269 |
}, |
| ... | ... |
@@ -8,14 +8,15 @@ import ( |
| 8 | 8 |
|
| 9 | 9 |
"github.com/openshift/origin/pkg/diagnostics/log" |
| 10 | 10 |
"github.com/openshift/origin/pkg/diagnostics/types" |
| 11 |
- "github.com/openshift/origin/pkg/diagnostics/types/diagnostic" |
|
| 12 | 11 |
) |
| 13 | 12 |
|
| 14 | 13 |
// UnitStatus |
| 15 | 14 |
type UnitStatus struct {
|
| 16 | 15 |
SystemdUnits map[string]types.SystemdUnit |
| 16 |
+} |
|
| 17 | 17 |
|
| 18 |
- Log *log.Logger |
|
| 18 |
+func (d UnitStatus) Name() string {
|
|
| 19 |
+ return "UnitStatus" |
|
| 19 | 20 |
} |
| 20 | 21 |
|
| 21 | 22 |
func (d UnitStatus) Description() string {
|
| ... | ... |
@@ -30,67 +31,33 @@ func (d UnitStatus) CanRun() (bool, error) {
|
| 30 | 30 |
|
| 31 | 31 |
return false, errors.New("systemd is not present on this host")
|
| 32 | 32 |
} |
| 33 |
-func (d UnitStatus) Check() (bool, []log.Message, []error, []error) {
|
|
| 34 |
- if _, err := d.CanRun(); err != nil {
|
|
| 35 |
- return false, nil, nil, []error{err}
|
|
| 36 |
- } |
|
| 37 |
- |
|
| 38 |
- warnings := []error{}
|
|
| 39 |
- errors := []error{}
|
|
| 40 |
- |
|
| 41 |
- unitWarnings, unitErrors := unitRequiresUnit(d.Log, d.SystemdUnits["openshift-node"], d.SystemdUnits["iptables"], nodeRequiresIPTables) |
|
| 42 |
- warnings = append(warnings, unitWarnings...) |
|
| 43 |
- errors = append(errors, unitErrors...) |
|
| 33 |
+func (d UnitStatus) Check() *types.DiagnosticResult {
|
|
| 34 |
+ r := types.NewDiagnosticResult("UnitStatus")
|
|
| 44 | 35 |
|
| 45 |
- unitWarnings, unitErrors = unitRequiresUnit(d.Log, d.SystemdUnits["openshift-node"], d.SystemdUnits["docker"], `OpenShift nodes use Docker to run containers.`) |
|
| 46 |
- warnings = append(warnings, unitWarnings...) |
|
| 47 |
- errors = append(errors, unitErrors...) |
|
| 48 |
- |
|
| 49 |
- unitWarnings, unitErrors = unitRequiresUnit(d.Log, d.SystemdUnits["openshift"], d.SystemdUnits["docker"], `OpenShift nodes use Docker to run containers.`) |
|
| 50 |
- warnings = append(warnings, unitWarnings...) |
|
| 51 |
- errors = append(errors, unitErrors...) |
|
| 52 |
- |
|
| 53 |
- // node's dependency on openvswitch is a special case. |
|
| 54 |
- // We do not need to enable ovs because openshift-node starts it for us. |
|
| 55 |
- if d.SystemdUnits["openshift-node"].Active && !d.SystemdUnits["openvswitch"].Active {
|
|
| 56 |
- diagnosticError := diagnostic.NewDiagnosticError("sdUnitSDNreqOVS", sdUnitSDNreqOVS, nil)
|
|
| 57 |
- d.Log.Error(diagnosticError.ID, diagnosticError.Explanation) |
|
| 58 |
- errors = append(errors, diagnosticError) |
|
| 59 |
- } |
|
| 36 |
+ unitRequiresUnit(r, d.SystemdUnits["openshift-node"], d.SystemdUnits["iptables"], nodeRequiresIPTables) |
|
| 37 |
+ unitRequiresUnit(r, d.SystemdUnits["openshift-node"], d.SystemdUnits["docker"], `OpenShift nodes use Docker to run containers.`) |
|
| 38 |
+ unitRequiresUnit(r, d.SystemdUnits["openshift-node"], d.SystemdUnits["openvswitch"], sdUnitSDNreqOVS) |
|
| 39 |
+ unitRequiresUnit(r, d.SystemdUnits["openshift-master"], d.SystemdUnits["openvswitch"], `OpenShift masters use openvswitch for access to cluster SDN networking`) |
|
| 40 |
+ // all-in-one networking *could* be simpler, so fewer checks |
|
| 41 |
+ unitRequiresUnit(r, d.SystemdUnits["openshift"], d.SystemdUnits["docker"], `OpenShift nodes use Docker to run containers.`) |
|
| 60 | 42 |
|
| 61 | 43 |
// Anything that is enabled but not running deserves notice |
| 62 | 44 |
for name, unit := range d.SystemdUnits {
|
| 63 | 45 |
if unit.Enabled && !unit.Active {
|
| 64 |
- diagnosticError := diagnostic.NewDiagnosticErrorFromTemplate("sdUnitInactive", sdUnitInactive, map[string]string{"unit": name})
|
|
| 65 |
- d.Log.LogMessage(log.ErrorLevel, *diagnosticError.LogMessage) |
|
| 66 |
- errors = append(errors, diagnosticError) |
|
| 46 |
+ r.Errort("sdUnitInactive", nil, sdUnitInactive, log.Hash{"unit": name})
|
|
| 67 | 47 |
} |
| 68 | 48 |
} |
| 69 |
- |
|
| 70 |
- return (len(errors) == 0), nil, warnings, errors |
|
| 49 |
+ return r |
|
| 71 | 50 |
} |
| 72 | 51 |
|
| 73 |
-func unitRequiresUnit(logger *log.Logger, unit types.SystemdUnit, requires types.SystemdUnit, reason string) ([]error, []error) {
|
|
| 74 |
- templateData := map[string]string{"unit": unit.Name, "required": requires.Name, "reason": reason}
|
|
| 52 |
+func unitRequiresUnit(r *types.DiagnosticResult, unit types.SystemdUnit, requires types.SystemdUnit, reason string) {
|
|
| 53 |
+ templateData := log.Hash{"unit": unit.Name, "required": requires.Name, "reason": reason}
|
|
| 75 | 54 |
|
| 76 | 55 |
if (unit.Active || unit.Enabled) && !requires.Exists {
|
| 77 |
- diagnosticError := diagnostic.NewDiagnosticErrorFromTemplate("sdUnitReqLoaded", sdUnitReqLoaded, templateData)
|
|
| 78 |
- logger.LogMessage(log.ErrorLevel, *diagnosticError.LogMessage) |
|
| 79 |
- return nil, []error{diagnosticError}
|
|
| 80 |
- |
|
| 56 |
+ r.Errort("sdUnitReqLoaded", nil, sdUnitReqLoaded, templateData)
|
|
| 81 | 57 |
} else if unit.Active && !requires.Active {
|
| 82 |
- diagnosticError := diagnostic.NewDiagnosticErrorFromTemplate("sdUnitReqActive", sdUnitReqActive, templateData)
|
|
| 83 |
- logger.LogMessage(log.ErrorLevel, *diagnosticError.LogMessage) |
|
| 84 |
- return nil, []error{diagnosticError}
|
|
| 85 |
- |
|
| 86 |
- } else if unit.Enabled && !requires.Enabled {
|
|
| 87 |
- diagnosticError := diagnostic.NewDiagnosticErrorFromTemplate("sdUnitReqEnabled", sdUnitReqEnabled, templateData)
|
|
| 88 |
- logger.LogMessage(log.WarnLevel, *diagnosticError.LogMessage) |
|
| 89 |
- return []error{diagnosticError}, nil
|
|
| 90 |
- |
|
| 58 |
+ r.Errort("sdUnitReqActive", nil, sdUnitReqActive, templateData)
|
|
| 91 | 59 |
} |
| 92 |
- |
|
| 93 |
- return nil, nil |
|
| 94 | 60 |
} |
| 95 | 61 |
|
| 96 | 62 |
func errStr(err error) string {
|
| ... | ... |
@@ -156,12 +123,4 @@ To ensure it is not failing to run, check the status and logs with: |
| 156 | 156 |
# systemctl status {{.required}}
|
| 157 | 157 |
# journalctl -ru {{.required}}
|
| 158 | 158 |
` |
| 159 |
- |
|
| 160 |
- sdUnitReqEnabled = ` |
|
| 161 |
-systemd unit {{.unit}} is enabled to run automatically at boot, but {{.required}} is not.
|
|
| 162 |
-{{.reason}}
|
|
| 163 |
-An administrator can enable the {{.required}} unit with:
|
|
| 164 |
- |
|
| 165 |
- # systemctl enable {{.required}}
|
|
| 166 |
- ` |
|
| 167 | 159 |
) |
| 168 | 160 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,175 @@ |
| 0 |
+package types |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "fmt" |
|
| 4 |
+ "github.com/golang/glog" |
|
| 5 |
+ "runtime" |
|
| 6 |
+ "strings" |
|
| 7 |
+ |
|
| 8 |
+ "github.com/openshift/origin/pkg/diagnostics/log" |
|
| 9 |
+) |
|
| 10 |
+ |
|
| 11 |
+type Diagnostic interface {
|
|
| 12 |
+ Name() string |
|
| 13 |
+ Description() string |
|
| 14 |
+ CanRun() (canRun bool, reason error) |
|
| 15 |
+ Check() *DiagnosticResult |
|
| 16 |
+} |
|
| 17 |
+ |
|
| 18 |
+type DiagnosticResult struct {
|
|
| 19 |
+ failure bool |
|
| 20 |
+ origin string // name of diagnostic; automatically inserted into log Entries |
|
| 21 |
+ logs []log.Entry |
|
| 22 |
+ warnings []DiagnosticError |
|
| 23 |
+ errors []DiagnosticError |
|
| 24 |
+} |
|
| 25 |
+ |
|
| 26 |
+func NewDiagnosticResult(origin string) *DiagnosticResult {
|
|
| 27 |
+ return &DiagnosticResult{origin: origin}
|
|
| 28 |
+} |
|
| 29 |
+ |
|
| 30 |
+func (r *DiagnosticResult) Complete() *DiagnosticResult {
|
|
| 31 |
+ if r.errors == nil {
|
|
| 32 |
+ r.errors = make([]DiagnosticError, 0) |
|
| 33 |
+ } |
|
| 34 |
+ if r.warnings == nil {
|
|
| 35 |
+ r.warnings = make([]DiagnosticError, 0) |
|
| 36 |
+ } |
|
| 37 |
+ if r.logs == nil {
|
|
| 38 |
+ r.logs = make([]log.Entry, 0) |
|
| 39 |
+ } |
|
| 40 |
+ return r |
|
| 41 |
+} |
|
| 42 |
+ |
|
| 43 |
+func (r *DiagnosticResult) appendLogs(stackDepth int, entry ...log.Entry) {
|
|
| 44 |
+ if r.logs == nil {
|
|
| 45 |
+ r.logs = make([]log.Entry, 0) |
|
| 46 |
+ } |
|
| 47 |
+ r.logs = append(r.logs, entry...) |
|
| 48 |
+ // glog immediately for debugging when a diagnostic silently chokes |
|
| 49 |
+ for _, entry := range entry {
|
|
| 50 |
+ if glog.V(glog.Level(6 - entry.Level.Level)) {
|
|
| 51 |
+ glog.InfoDepth(stackDepth, entry.Message.String()) |
|
| 52 |
+ } |
|
| 53 |
+ } |
|
| 54 |
+} |
|
| 55 |
+ |
|
| 56 |
+func (r *DiagnosticResult) Failure() bool {
|
|
| 57 |
+ return r.failure |
|
| 58 |
+} |
|
| 59 |
+ |
|
| 60 |
+func (r *DiagnosticResult) Logs() []log.Entry {
|
|
| 61 |
+ if r.logs == nil {
|
|
| 62 |
+ return make([]log.Entry, 0) |
|
| 63 |
+ } |
|
| 64 |
+ return r.logs |
|
| 65 |
+} |
|
| 66 |
+ |
|
| 67 |
+func (r *DiagnosticResult) appendWarnings(warn ...DiagnosticError) {
|
|
| 68 |
+ if r.warnings == nil {
|
|
| 69 |
+ r.warnings = make([]DiagnosticError, 0) |
|
| 70 |
+ } |
|
| 71 |
+ r.warnings = append(r.warnings, warn...) |
|
| 72 |
+} |
|
| 73 |
+ |
|
| 74 |
+func (r *DiagnosticResult) Warnings() []DiagnosticError {
|
|
| 75 |
+ if r.warnings == nil {
|
|
| 76 |
+ return make([]DiagnosticError, 0) |
|
| 77 |
+ } |
|
| 78 |
+ return r.warnings |
|
| 79 |
+} |
|
| 80 |
+ |
|
| 81 |
+func (r *DiagnosticResult) appendErrors(err ...DiagnosticError) {
|
|
| 82 |
+ if r.errors == nil {
|
|
| 83 |
+ r.errors = make([]DiagnosticError, 0) |
|
| 84 |
+ } |
|
| 85 |
+ r.failure = true |
|
| 86 |
+ r.errors = append(r.errors, err...) |
|
| 87 |
+} |
|
| 88 |
+ |
|
| 89 |
+func (r *DiagnosticResult) Errors() []DiagnosticError {
|
|
| 90 |
+ if r.errors == nil {
|
|
| 91 |
+ return make([]DiagnosticError, 0) |
|
| 92 |
+ } |
|
| 93 |
+ return r.errors |
|
| 94 |
+} |
|
| 95 |
+ |
|
| 96 |
+func (r *DiagnosticResult) Append(r2 *DiagnosticResult) {
|
|
| 97 |
+ r.Complete() |
|
| 98 |
+ r2.Complete() |
|
| 99 |
+ r.logs = append(r.logs, r2.logs...) |
|
| 100 |
+ r.warnings = append(r.warnings, r2.warnings...) |
|
| 101 |
+ r.errors = append(r.errors, r2.errors...) |
|
| 102 |
+ r.failure = r.failure || r2.failure |
|
| 103 |
+} |
|
| 104 |
+ |
|
| 105 |
+// basic ingress functions (private) |
|
| 106 |
+func (r *DiagnosticResult) caller(depth int) string {
|
|
| 107 |
+ if _, file, line, ok := runtime.Caller(depth + 1); ok {
|
|
| 108 |
+ paths := strings.SplitAfter(file, "github.com/") |
|
| 109 |
+ return fmt.Sprintf("diagnostic %s@%s:%d", r.origin, paths[len(paths)-1], line)
|
|
| 110 |
+ } |
|
| 111 |
+ return "diagnostic " + r.origin |
|
| 112 |
+} |
|
| 113 |
+func (r *DiagnosticResult) logError(id string, err error, msg *log.Message) {
|
|
| 114 |
+ r.appendLogs(2, log.Entry{id, r.caller(2), log.ErrorLevel, *msg})
|
|
| 115 |
+ if de, ok := err.(DiagnosticError); ok {
|
|
| 116 |
+ r.appendErrors(de) |
|
| 117 |
+ } else {
|
|
| 118 |
+ r.appendErrors(DiagnosticError{id, msg, err})
|
|
| 119 |
+ } |
|
| 120 |
+} |
|
| 121 |
+func (r *DiagnosticResult) logWarning(id string, err error, msg *log.Message) {
|
|
| 122 |
+ r.appendLogs(2, log.Entry{id, r.caller(2), log.WarnLevel, *msg})
|
|
| 123 |
+ if de, ok := err.(DiagnosticError); ok {
|
|
| 124 |
+ r.appendWarnings(de) |
|
| 125 |
+ } else {
|
|
| 126 |
+ r.appendWarnings(DiagnosticError{id, msg, err})
|
|
| 127 |
+ } |
|
| 128 |
+} |
|
| 129 |
+func (r *DiagnosticResult) logMessage(id string, level log.Level, msg *log.Message) {
|
|
| 130 |
+ r.appendLogs(2, log.Entry{id, r.caller(2), level, *msg})
|
|
| 131 |
+} |
|
| 132 |
+ |
|
| 133 |
+// Public ingress functions |
|
| 134 |
+// Errors are recorded as errors and also logged |
|
| 135 |
+func (r *DiagnosticResult) Error(id string, err error, text string) {
|
|
| 136 |
+ r.logError(id, err, &log.Message{id, "", nil, text})
|
|
| 137 |
+} |
|
| 138 |
+func (r *DiagnosticResult) Errorf(id string, err error, format string, a ...interface{}) {
|
|
| 139 |
+ r.logError(id, err, &log.Message{id, "", nil, fmt.Sprintf(format, a...)})
|
|
| 140 |
+} |
|
| 141 |
+func (r *DiagnosticResult) Errort(id string, err error, template string, data interface{} /* log.Hash */) {
|
|
| 142 |
+ r.logError(id, err, &log.Message{id, template, data, ""})
|
|
| 143 |
+} |
|
| 144 |
+ |
|
| 145 |
+// Warnings are recorded as warnings and also logged |
|
| 146 |
+func (r *DiagnosticResult) Warn(id string, err error, text string) {
|
|
| 147 |
+ r.logWarning(id, err, &log.Message{id, "", nil, text})
|
|
| 148 |
+} |
|
| 149 |
+func (r *DiagnosticResult) Warnf(id string, err error, format string, a ...interface{}) {
|
|
| 150 |
+ r.logWarning(id, err, &log.Message{id, "", nil, fmt.Sprintf(format, a...)})
|
|
| 151 |
+} |
|
| 152 |
+func (r *DiagnosticResult) Warnt(id string, err error, template string, data interface{} /* log.Hash */) {
|
|
| 153 |
+ r.logWarning(id, err, &log.Message{id, template, data, ""})
|
|
| 154 |
+} |
|
| 155 |
+ |
|
| 156 |
+// Info/Debug are just logged. |
|
| 157 |
+func (r *DiagnosticResult) Info(id string, text string) {
|
|
| 158 |
+ r.logMessage(id, log.InfoLevel, &log.Message{id, "", nil, text})
|
|
| 159 |
+} |
|
| 160 |
+func (r *DiagnosticResult) Infof(id string, format string, a ...interface{}) {
|
|
| 161 |
+ r.logMessage(id, log.InfoLevel, &log.Message{id, "", nil, fmt.Sprintf(format, a...)})
|
|
| 162 |
+} |
|
| 163 |
+func (r *DiagnosticResult) Infot(id string, template string, data interface{} /* log.Hash */) {
|
|
| 164 |
+ r.logMessage(id, log.InfoLevel, &log.Message{id, template, data, ""})
|
|
| 165 |
+} |
|
| 166 |
+func (r *DiagnosticResult) Debug(id string, text string) {
|
|
| 167 |
+ r.logMessage(id, log.DebugLevel, &log.Message{id, "", nil, text})
|
|
| 168 |
+} |
|
| 169 |
+func (r *DiagnosticResult) Debugf(id string, format string, a ...interface{}) {
|
|
| 170 |
+ r.logMessage(id, log.DebugLevel, &log.Message{id, "", nil, fmt.Sprintf(format, a...)})
|
|
| 171 |
+} |
|
| 172 |
+func (r *DiagnosticResult) Debugt(id string, template string, data interface{} /* log.Hash */) {
|
|
| 173 |
+ r.logMessage(id, log.DebugLevel, &log.Message{id, template, data, ""})
|
|
| 174 |
+} |
| 0 | 175 |
deleted file mode 100644 |
| ... | ... |
@@ -1,55 +0,0 @@ |
| 1 |
-package diagnostic |
|
| 2 |
- |
|
| 3 |
-// This needed to be separate from other types to avoid import cycle |
|
| 4 |
-// diagnostic -> discovery -> types |
|
| 5 |
- |
|
| 6 |
-import ( |
|
| 7 |
- "fmt" |
|
| 8 |
- |
|
| 9 |
- "github.com/openshift/origin/pkg/diagnostics/log" |
|
| 10 |
-) |
|
| 11 |
- |
|
| 12 |
-type Diagnostic interface {
|
|
| 13 |
- Description() string |
|
| 14 |
- CanRun() (canRun bool, reason error) |
|
| 15 |
- Check() (success bool, info []log.Message, warnings []error, errors []error) |
|
| 16 |
-} |
|
| 17 |
- |
|
| 18 |
-type DiagnosticError struct {
|
|
| 19 |
- ID string |
|
| 20 |
- Explanation string |
|
| 21 |
- Cause error |
|
| 22 |
- |
|
| 23 |
- LogMessage *log.Message |
|
| 24 |
-} |
|
| 25 |
- |
|
| 26 |
-func NewDiagnosticError(id, explanation string, cause error) DiagnosticError {
|
|
| 27 |
- return DiagnosticError{id, explanation, cause, nil}
|
|
| 28 |
-} |
|
| 29 |
- |
|
| 30 |
-func NewDiagnosticErrorFromTemplate(id, template string, templateData interface{}) DiagnosticError {
|
|
| 31 |
- return DiagnosticError{id, "", nil,
|
|
| 32 |
- &log.Message{
|
|
| 33 |
- ID: id, |
|
| 34 |
- Template: template, |
|
| 35 |
- TemplateData: templateData, |
|
| 36 |
- }, |
|
| 37 |
- } |
|
| 38 |
-} |
|
| 39 |
- |
|
| 40 |
-func (e DiagnosticError) Error() string {
|
|
| 41 |
- if e.Cause != nil {
|
|
| 42 |
- return e.Cause.Error() |
|
| 43 |
- } |
|
| 44 |
- |
|
| 45 |
- if e.LogMessage != nil {
|
|
| 46 |
- return fmt.Sprintf("%v", e.LogMessage)
|
|
| 47 |
- } |
|
| 48 |
- |
|
| 49 |
- return e.Explanation |
|
| 50 |
-} |
|
| 51 |
- |
|
| 52 |
-func IsDiagnosticError(e error) bool {
|
|
| 53 |
- _, ok := e.(DiagnosticError) |
|
| 54 |
- return ok |
|
| 55 |
-} |
| 56 | 1 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,36 @@ |
| 0 |
+package types |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "fmt" |
|
| 4 |
+ |
|
| 5 |
+ "github.com/openshift/origin/pkg/diagnostics/log" |
|
| 6 |
+) |
|
| 7 |
+ |
|
| 8 |
+type DiagnosticError struct {
|
|
| 9 |
+ ID string |
|
| 10 |
+ LogMessage *log.Message |
|
| 11 |
+ Cause error |
|
| 12 |
+} |
|
| 13 |
+ |
|
| 14 |
+func (e DiagnosticError) Error() string {
|
|
| 15 |
+ if e.LogMessage != nil {
|
|
| 16 |
+ return fmt.Sprintf("%v", e.LogMessage)
|
|
| 17 |
+ } |
|
| 18 |
+ if e.Cause != nil {
|
|
| 19 |
+ return e.Cause.Error() |
|
| 20 |
+ } |
|
| 21 |
+ return e.ID |
|
| 22 |
+} |
|
| 23 |
+ |
|
| 24 |
+func IsDiagnosticError(e error) bool {
|
|
| 25 |
+ _, ok := e.(DiagnosticError) |
|
| 26 |
+ return ok |
|
| 27 |
+} |
|
| 28 |
+ |
|
| 29 |
+// is the error a diagnostics error that matches the given ID? |
|
| 30 |
+func MatchesDiagError(err error, id string) bool {
|
|
| 31 |
+ if derr, ok := err.(DiagnosticError); ok && derr.ID == id {
|
|
| 32 |
+ return true |
|
| 33 |
+ } |
|
| 34 |
+ return false |
|
| 35 |
+} |
| 0 | 36 |
deleted file mode 100644 |
| ... | ... |
@@ -1,38 +0,0 @@ |
| 1 |
-package types |
|
| 2 |
- |
|
| 3 |
-import "fmt" |
|
| 4 |
- |
|
| 5 |
-type Version struct {
|
|
| 6 |
- X, Y, Z int |
|
| 7 |
-} |
|
| 8 |
- |
|
| 9 |
-func (a Version) Eq(b Version) bool {
|
|
| 10 |
- return a.X == b.X && a.Y == b.Y && a.Z == b.Z |
|
| 11 |
-} |
|
| 12 |
- |
|
| 13 |
-func (a Version) Gt(b Version) bool {
|
|
| 14 |
- if a.X > b.X {
|
|
| 15 |
- return true |
|
| 16 |
- } |
|
| 17 |
- if a.X < b.X {
|
|
| 18 |
- return false |
|
| 19 |
- } // so, Xs are equal |
|
| 20 |
- if a.Y > b.Y {
|
|
| 21 |
- return true |
|
| 22 |
- } |
|
| 23 |
- if a.Y < b.Y {
|
|
| 24 |
- return false |
|
| 25 |
- } // so, Ys are equal |
|
| 26 |
- if a.Z > b.Z {
|
|
| 27 |
- return true |
|
| 28 |
- } |
|
| 29 |
- return false |
|
| 30 |
-} |
|
| 31 |
- |
|
| 32 |
-func (v Version) GoString() string {
|
|
| 33 |
- return fmt.Sprintf("%d.%d.%d", v.X, v.Y, v.Z)
|
|
| 34 |
-} |
|
| 35 |
- |
|
| 36 |
-func (v Version) NonZero() bool {
|
|
| 37 |
- return !v.Eq(Version{0, 0, 0})
|
|
| 38 |
-} |
| ... | ... |
@@ -4392,6 +4392,36 @@ _openshift_ex_build-chain() |
| 4392 | 4392 |
must_have_one_noun=() |
| 4393 | 4393 |
} |
| 4394 | 4394 |
|
| 4395 |
+_openshift_ex_diagnostics() |
|
| 4396 |
+{
|
|
| 4397 |
+ last_command="openshift_ex_diagnostics" |
|
| 4398 |
+ commands=() |
|
| 4399 |
+ |
|
| 4400 |
+ flags=() |
|
| 4401 |
+ two_word_flags=() |
|
| 4402 |
+ flags_with_completion=() |
|
| 4403 |
+ flags_completion=() |
|
| 4404 |
+ |
|
| 4405 |
+ flags+=("--cluster-context=")
|
|
| 4406 |
+ flags+=("--config=")
|
|
| 4407 |
+ flags+=("--context=")
|
|
| 4408 |
+ flags+=("--diaglevel=")
|
|
| 4409 |
+ two_word_flags+=("-l")
|
|
| 4410 |
+ flags+=("--diagnostics=")
|
|
| 4411 |
+ two_word_flags+=("-d")
|
|
| 4412 |
+ flags+=("--help")
|
|
| 4413 |
+ flags+=("-h")
|
|
| 4414 |
+ flags+=("--host")
|
|
| 4415 |
+ flags+=("--loglevel=")
|
|
| 4416 |
+ flags+=("--master-config=")
|
|
| 4417 |
+ flags+=("--node-config=")
|
|
| 4418 |
+ flags+=("--output=")
|
|
| 4419 |
+ two_word_flags+=("-o")
|
|
| 4420 |
+ |
|
| 4421 |
+ must_have_one_flag=() |
|
| 4422 |
+ must_have_one_noun=() |
|
| 4423 |
+} |
|
| 4424 |
+ |
|
| 4395 | 4425 |
_openshift_ex_options() |
| 4396 | 4426 |
{
|
| 4397 | 4427 |
last_command="openshift_ex_options" |
| ... | ... |
@@ -4416,6 +4446,7 @@ _openshift_ex() |
| 4416 | 4416 |
commands+=("tokens")
|
| 4417 | 4417 |
commands+=("ipfailover")
|
| 4418 | 4418 |
commands+=("build-chain")
|
| 4419 |
+ commands+=("diagnostics")
|
|
| 4419 | 4420 |
commands+=("options")
|
| 4420 | 4421 |
|
| 4421 | 4422 |
flags=() |
| 4422 | 4423 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,73 @@ |
| 0 |
+// +build integration,!no-etcd |
|
| 1 |
+ |
|
| 2 |
+package integration |
|
| 3 |
+ |
|
| 4 |
+import ( |
|
| 5 |
+ "testing" |
|
| 6 |
+ |
|
| 7 |
+ kapi "github.com/GoogleCloudPlatform/kubernetes/pkg/api" |
|
| 8 |
+ |
|
| 9 |
+ clusterdiags "github.com/openshift/origin/pkg/diagnostics/cluster" |
|
| 10 |
+ diagtype "github.com/openshift/origin/pkg/diagnostics/types" |
|
| 11 |
+ testutil "github.com/openshift/origin/test/util" |
|
| 12 |
+) |
|
| 13 |
+ |
|
| 14 |
+func TestDiagNodeConditions(t *testing.T) {
|
|
| 15 |
+ //masterConfig, clientFile, err := testutil.StartTestAllInOne() |
|
| 16 |
+ _, clientFile, err := testutil.StartTestMaster() |
|
| 17 |
+ if err != nil {
|
|
| 18 |
+ t.Fatalf("unexpected error: %v", err)
|
|
| 19 |
+ } |
|
| 20 |
+ client, err := testutil.GetClusterAdminKubeClient(clientFile) |
|
| 21 |
+ if err != nil {
|
|
| 22 |
+ t.Fatalf("unexpected error: %v", err)
|
|
| 23 |
+ } |
|
| 24 |
+ |
|
| 25 |
+ nodeDiag := clusterdiags.NodeDefinitions{KubeClient: client}
|
|
| 26 |
+ // First check with no nodes defined; should get an error about that. |
|
| 27 |
+ // ok, logs, warnings, errors := nodeDiag.Check() |
|
| 28 |
+ if errors := nodeDiag.Check().Errors(); len(errors) != 1 || |
|
| 29 |
+ !diagtype.MatchesDiagError(errors[0], "clNoAvailNodes") {
|
|
| 30 |
+ t.Errorf("expected 1 error about not having nodes, not: %#v", errors)
|
|
| 31 |
+ } |
|
| 32 |
+ |
|
| 33 |
+ // Next create a node and leave it in NotReady state. Should get a warning |
|
| 34 |
+ // about that, plus the previous error as there are still no nodes available. |
|
| 35 |
+ node, err := client.Nodes().Create(&kapi.Node{ObjectMeta: kapi.ObjectMeta{Name: "test-node"}})
|
|
| 36 |
+ if err != nil {
|
|
| 37 |
+ t.Fatalf("expected no errors creating a node: %#v", err)
|
|
| 38 |
+ } |
|
| 39 |
+ result := nodeDiag.Check() |
|
| 40 |
+ if errors := result.Errors(); len(errors) != 1 || |
|
| 41 |
+ !diagtype.MatchesDiagError(errors[0], "clNoAvailNodes") {
|
|
| 42 |
+ t.Fatalf("expected 1 error about not having nodes, not: %#v", errors)
|
|
| 43 |
+ } else if warnings := result.Warnings(); len(warnings) < 1 || !diagtype.MatchesDiagError(warnings[0], "clNodeNotReady") {
|
|
| 44 |
+ t.Fatalf("expected a warning about test-node not being ready, not: %#v", warnings)
|
|
| 45 |
+ } |
|
| 46 |
+ |
|
| 47 |
+ _ = node |
|
| 48 |
+ /* |
|
| 49 |
+ // Put the new node in Ready state and verify the diagnostic is clean |
|
| 50 |
+ if _, err := client.Nodes().UpdateStatus(node); err != nil {
|
|
| 51 |
+ t.Fatalf("expected no errors updating node status, but: %#v", err)
|
|
| 52 |
+ } |
|
| 53 |
+ result = nodeDiag.Check() |
|
| 54 |
+ if warnings := result.Warnings(); len(warnings) > 0 {
|
|
| 55 |
+ t.Fatalf("expected no warning with one node ready, but: %#v", warnings)
|
|
| 56 |
+ } else if errors := result.Errors(); len(warnings) > 0 {
|
|
| 57 |
+ t.Fatalf("expected no errors with one node ready, but: %#v", errors)
|
|
| 58 |
+ } |
|
| 59 |
+ |
|
| 60 |
+ // Make the node unschedulable and verify diagnostics notices |
|
| 61 |
+ node.Spec.Unschedulable = true |
|
| 62 |
+ if _, err := client.Nodes().Update(node); err != nil {
|
|
| 63 |
+ t.Fatalf("expected no errors making node unschedulable, but: %#v", err)
|
|
| 64 |
+ } |
|
| 65 |
+ if errors := result.Errors(); len(errors) != 1 || |
|
| 66 |
+ !diagtype.MatchesDiagError(errors[0], "clNoAvailNodes") {
|
|
| 67 |
+ t.Fatalf("expected 1 error about not having nodes, but: %#v", errors)
|
|
| 68 |
+ } else if warnings := result.Warnings(); len(warnings) < 1 || !diagtype.MatchesDiagError(warnings[0], "clNodeNotSched") {
|
|
| 69 |
+ t.Fatalf("expected a warning about test-node not being schedulable, but: %#v", warnings)
|
|
| 70 |
+ } |
|
| 71 |
+ */ |
|
| 72 |
+} |