Browse code

introduce `openshift ex diagnostics`

Luke Meyer authored on 2015/04/04 12:59:11
Showing 27 changed files
... ...
@@ -10,7 +10,6 @@
10 10
 /examples/sample-app/logs/openshift.log
11 11
 *.swp
12 12
 .vimrc
13
-.kubeconfig
14 13
 .vagrant-openshift.json*
15 14
 .DS_Store
16 15
 .idea
17 16
new file mode 100644
... ...
@@ -0,0 +1,194 @@
0
+package cmd
1
+
2
+import (
3
+	"fmt"
4
+	"github.com/openshift/origin/pkg/cmd/experimental/diagnostics/options"
5
+	"github.com/openshift/origin/pkg/cmd/server/start"
6
+	"github.com/openshift/origin/pkg/cmd/templates"
7
+	osclientcmd "github.com/openshift/origin/pkg/cmd/util/clientcmd"
8
+	"github.com/openshift/origin/pkg/diagnostics/run"
9
+	"github.com/spf13/cobra"
10
+	"io"
11
+)
12
+
13
+const longAllDescription = `
14
+OpenShift Diagnostics
15
+
16
+This command helps you understand and troubleshoot OpenShift. It is
17
+intended to be run from the same context as an OpenShift client or running
18
+master / node in order to troubleshoot from the perspective of each.
19
+
20
+    $ %[1]s
21
+
22
+If run without flags or subcommands, it will check for config files for
23
+client, master, and node, and if found, use them for troubleshooting
24
+those components. If master/node config files are not found, the tool
25
+assumes they are not present and does diagnostics only as a client.
26
+
27
+You may also specify config files explicitly with flags below, in which
28
+case you will receive an error if they are invalid or not found.
29
+
30
+    $ %[1]s --master-config=/etc/openshift/master/master-config.yaml
31
+
32
+Subcommands may be used to scope the troubleshooting to a particular
33
+component and are not limited to using config files; you can and should
34
+use the same flags that are actually set on the command line for that
35
+component to configure the diagnostic.
36
+
37
+    $ %[1]s node --hostname='node.example.com' --kubeconfig=...
38
+
39
+NOTE: This is an alpha version of diagnostics and will change significantly.
40
+NOTE: Global flags (from the 'options' subcommand) are ignored here but
41
+can be used with subcommands.
42
+`
43
+
44
+func NewCommandDiagnostics(name string, fullName string, out io.Writer) *cobra.Command {
45
+	opts := options.NewAllDiagnosticsOptions(out)
46
+	cmd := &cobra.Command{
47
+		Use:   name,
48
+		Short: "This utility helps you understand and troubleshoot OpenShift v3.",
49
+		Long:  fmt.Sprintf(longAllDescription, fullName),
50
+		Run: func(c *cobra.Command, args []string) {
51
+			opts.GlobalFlags = c.PersistentFlags()
52
+			run.Diagnose(opts)
53
+		},
54
+	}
55
+	cmd.SetOutput(out) // for output re: usage / help
56
+	opts.BindFlags(cmd.Flags(), options.NewAllDiagnosticsFlagInfos())
57
+	// Although we reuse DiagOptions across all commands, we do not want the flags buried
58
+	// in the "global" flags, so we add them locally at each command.
59
+	opts.DiagOptions.BindFlags(cmd.Flags(), options.NewDiagnosticsFlagInfos())
60
+
61
+	/*
62
+	   This command needs the client factory built in the "client" subcommand.
63
+	   Generating the factory adds flags to the "client" cmd, and we do not want
64
+	   to add those flags to this command (the only client option here is a config
65
+	   file). So the factory object from client cmd is reused for this command.
66
+	*/
67
+	clientCmd, factory := NewClientCommand("client", name+" client", out)
68
+	opts.ClientDiagOptions.Factory = factory
69
+
70
+	cmd.AddCommand(clientCmd)
71
+	cmd.AddCommand(NewMasterCommand("master", name+" master", out))
72
+	cmd.AddCommand(NewNodeCommand("node", name+" node", out))
73
+	cmd.AddCommand(NewOptionsCommand())
74
+
75
+	return cmd
76
+}
77
+
78
+const longClientDescription = `
79
+OpenShift Diagnostics
80
+
81
+This command helps you understand and troubleshoot OpenShift as a user. It is
82
+intended to be run from the same context as an OpenShift client
83
+("openshift cli" or "osc") and with the same configuration options.
84
+
85
+    $ %s
86
+`
87
+
88
+func NewClientCommand(name string, fullName string, out io.Writer) (*cobra.Command, *osclientcmd.Factory) {
89
+	opts := options.NewClientDiagnosticsOptions(out, nil)
90
+	cmd := &cobra.Command{
91
+		Use:   name,
92
+		Short: "Troubleshoot using the OpenShift v3 client.",
93
+		Long:  fmt.Sprintf(longClientDescription, fullName),
94
+		Run: func(c *cobra.Command, args []string) {
95
+			run.Diagnose(&options.AllDiagnosticsOptions{
96
+				ClientDiagOptions: opts,
97
+				DiagOptions:       opts.DiagOptions,
98
+				GlobalFlags:       c.PersistentFlags(),
99
+			})
100
+		},
101
+	}
102
+	cmd.SetOutput(out) // for output re: usage / help
103
+	opts.MustCheck = true
104
+	opts.Factory = osclientcmd.New(cmd.PersistentFlags()) // side effect: add standard persistent flags for openshift client
105
+	opts.BindFlags(cmd.Flags(), options.NewClientDiagnosticsFlagInfos())
106
+	opts.DiagOptions.BindFlags(cmd.Flags(), options.NewDiagnosticsFlagInfos())
107
+
108
+	cmd.AddCommand(NewOptionsCommand())
109
+	return cmd, opts.Factory
110
+}
111
+
112
+const longMasterDescription = `
113
+OpenShift Diagnostics
114
+
115
+This command helps you understand and troubleshoot a running OpenShift
116
+master. It is intended to be run from the same context as the master
117
+(where "openshift start" or "openshift start master" is run, possibly from
118
+systemd or inside a container) and with the same configuration options.
119
+
120
+    $ %s
121
+`
122
+
123
+func NewMasterCommand(name string, fullName string, out io.Writer) *cobra.Command {
124
+	opts := options.NewMasterDiagnosticsOptions(out, nil)
125
+	cmd := &cobra.Command{
126
+		Use:   name,
127
+		Short: "Troubleshoot an OpenShift v3 master.",
128
+		Long:  fmt.Sprintf(longMasterDescription, fullName),
129
+		Run: func(c *cobra.Command, args []string) {
130
+			run.Diagnose(&options.AllDiagnosticsOptions{
131
+				MasterDiagOptions: opts,
132
+				DiagOptions:       opts.DiagOptions,
133
+				GlobalFlags:       c.PersistentFlags(),
134
+			})
135
+		},
136
+	}
137
+	cmd.SetOutput(out) // for output re: usage / help
138
+	opts.MustCheck = true
139
+	opts.MasterStartOptions = &start.MasterOptions{MasterArgs: start.MasterArgsAndFlags(cmd.Flags())}
140
+	opts.BindFlags(cmd.Flags(), options.NewMasterDiagnosticsFlagInfos())
141
+	opts.DiagOptions.BindFlags(cmd.Flags(), options.NewDiagnosticsFlagInfos())
142
+
143
+	cmd.AddCommand(NewOptionsCommand())
144
+	return cmd
145
+}
146
+
147
+const longNodeDescription = `
148
+OpenShift Diagnostics
149
+
150
+This command helps you understand and troubleshoot a running OpenShift
151
+node. It is intended to be run from the same context as the node
152
+(where "openshift start" or "openshift start node" is run, possibly from
153
+systemd or inside a container) and with the same configuration options.
154
+
155
+    $ %s
156
+`
157
+
158
+func NewNodeCommand(name string, fullName string, out io.Writer) *cobra.Command {
159
+	opts := options.NewNodeDiagnosticsOptions(out, nil)
160
+	cmd := &cobra.Command{
161
+		Use:   name,
162
+		Short: "Troubleshoot an OpenShift v3 node.",
163
+		Long:  fmt.Sprintf(longNodeDescription, fullName),
164
+		Run: func(c *cobra.Command, args []string) {
165
+			run.Diagnose(&options.AllDiagnosticsOptions{
166
+				NodeDiagOptions: opts,
167
+				DiagOptions:     opts.DiagOptions,
168
+				GlobalFlags:     c.PersistentFlags(),
169
+			})
170
+		},
171
+	}
172
+	cmd.SetOutput(out) // for output re: usage / help
173
+	opts.MustCheck = true
174
+	opts.NodeStartOptions = &start.NodeOptions{NodeArgs: start.NodeArgsAndFlags(cmd.Flags())}
175
+	opts.BindFlags(cmd.Flags(), options.NewNodeDiagnosticsFlagInfos())
176
+	opts.DiagOptions.BindFlags(cmd.Flags(), options.NewDiagnosticsFlagInfos())
177
+
178
+	cmd.AddCommand(NewOptionsCommand())
179
+	return cmd
180
+}
181
+
182
+func NewOptionsCommand() *cobra.Command {
183
+	cmd := &cobra.Command{
184
+		Use: "options",
185
+		Run: func(cmd *cobra.Command, args []string) {
186
+			cmd.Usage()
187
+		},
188
+	}
189
+
190
+	templates.UseOptionsTemplates(cmd)
191
+
192
+	return cmd
193
+}
0 194
new file mode 100644
... ...
@@ -0,0 +1,53 @@
0
+package options
1
+
2
+import (
3
+	"github.com/spf13/pflag"
4
+	"io"
5
+)
6
+
7
+// user options for openshift-diagnostics main command
8
+type AllDiagnosticsOptions struct {
9
+	DiagOptions       *DiagnosticsOptions
10
+	ClientDiagOptions *ClientDiagnosticsOptions
11
+	MasterDiagOptions *MasterDiagnosticsOptions
12
+	NodeDiagOptions   *NodeDiagnosticsOptions
13
+	ClientConfigPath  string
14
+	MasterConfigPath  string
15
+	NodeConfigPath    string
16
+
17
+	// there are cases where discovery has to look up flags created indirectly
18
+	GlobalFlags *pflag.FlagSet
19
+}
20
+
21
+// definitions used to bind the options to actual flags on a command
22
+type AllDiagnosticsFlagInfos struct {
23
+	ClientConfigPath FlagInfo
24
+	MasterConfigPath FlagInfo
25
+	NodeConfigPath   FlagInfo
26
+}
27
+
28
+func NewAllDiagnosticsOptions(out io.Writer) *AllDiagnosticsOptions {
29
+	common := NewDiagnosticsOptions(out)
30
+
31
+	return &AllDiagnosticsOptions{
32
+		DiagOptions:       common,
33
+		ClientDiagOptions: NewClientDiagnosticsOptions(nil, common),
34
+		MasterDiagOptions: NewMasterDiagnosticsOptions(nil, common),
35
+		NodeDiagOptions:   NewNodeDiagnosticsOptions(nil, common),
36
+	}
37
+}
38
+
39
+// default overrideable flag specifications to be bound to options.
40
+func NewAllDiagnosticsFlagInfos() *AllDiagnosticsFlagInfos {
41
+	return &AllDiagnosticsFlagInfos{
42
+		ClientConfigPath: FlagInfo{FlagAllClientConfigName, "", "", "Path to the client config file."},
43
+		MasterConfigPath: FlagInfo{FlagAllMasterConfigName, "", "", "Path to the master config file."},
44
+		NodeConfigPath:   FlagInfo{FlagAllNodeConfigName, "", "", "Path to the node config file."},
45
+	}
46
+}
47
+
48
+func (o *AllDiagnosticsOptions) BindFlags(cmdFlags *pflag.FlagSet, flagInfos *AllDiagnosticsFlagInfos) {
49
+	flagInfos.ClientConfigPath.BindStringFlag(cmdFlags, &o.ClientConfigPath)
50
+	flagInfos.MasterConfigPath.BindStringFlag(cmdFlags, &o.MasterConfigPath)
51
+	flagInfos.NodeConfigPath.BindStringFlag(cmdFlags, &o.NodeConfigPath)
52
+}
0 53
new file mode 100644
... ...
@@ -0,0 +1,46 @@
0
+package options
1
+
2
+import (
3
+	osclientcmd "github.com/openshift/origin/pkg/cmd/util/clientcmd"
4
+	"github.com/spf13/pflag"
5
+	"io"
6
+)
7
+
8
+// user options for openshift-diagnostics client command
9
+type ClientDiagnosticsOptions struct {
10
+	DiagOptions *DiagnosticsOptions
11
+	Factory     *osclientcmd.Factory
12
+	MustCheck   bool // set for "diagnostics client" which requires diagnosing client even there is if no config file
13
+	// Turns out we don't need to add any flags... YET
14
+}
15
+
16
+// definitions used to bind the options to actual flags on a command
17
+type ClientDiagnosticsFlagInfos struct {
18
+	// don't need yet...
19
+	//Something   FlagInfo
20
+}
21
+
22
+// supply output writer or pre-created DiagnosticsOptions
23
+func NewClientDiagnosticsOptions(out io.Writer, opts *DiagnosticsOptions) *ClientDiagnosticsOptions {
24
+	if opts != nil {
25
+		return &ClientDiagnosticsOptions{
26
+			DiagOptions: opts,
27
+		}
28
+	} else if out != nil {
29
+		return &ClientDiagnosticsOptions{
30
+			DiagOptions: NewDiagnosticsOptions(out),
31
+		}
32
+	}
33
+	return nil
34
+}
35
+
36
+// default overrideable flag specifications to be bound to options.
37
+func NewClientDiagnosticsFlagInfos() *ClientDiagnosticsFlagInfos {
38
+	return &ClientDiagnosticsFlagInfos{
39
+	//NodeConfigPath:   FlagInfo{"node-config", "", "", "Path to the node config file."},
40
+	}
41
+}
42
+
43
+func (o *ClientDiagnosticsOptions) BindFlags(cmdFlags *pflag.FlagSet, flagInfos *ClientDiagnosticsFlagInfos) {
44
+	//flagInfos.Something.BindStringFlag(cmdFlags, &o.Something)
45
+}
0 46
new file mode 100644
... ...
@@ -0,0 +1,46 @@
0
+package options
1
+
2
+import (
3
+	"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
4
+	cmdutil "github.com/openshift/origin/pkg/cmd/util"
5
+	"github.com/spf13/pflag"
6
+	"io"
7
+)
8
+
9
+// all of the diagnostics commands will bind these options
10
+type DiagnosticsOptions struct {
11
+	Diagnostics *util.StringList // named diagnostics to run
12
+	DiagLevel   int              // show output of this priority or higher
13
+	DiagFormat  string           // format of output - text/json/yaml
14
+
15
+	Output cmdutil.Output // this is used for discovery and diagnostic output
16
+}
17
+
18
+func NewDiagnosticsOptions(out io.Writer) *DiagnosticsOptions {
19
+	return &DiagnosticsOptions{
20
+		Diagnostics: &util.StringList{}, // have to instantiate in order to bind flag
21
+		Output:      cmdutil.Output{out},
22
+	}
23
+}
24
+
25
+// definitions used to bind the options to actual flags on a command
26
+type DiagnosticsFlagInfos struct {
27
+	Diagnostics FlagInfo
28
+	DiagLevel   FlagInfo
29
+	DiagFormat  FlagInfo
30
+}
31
+
32
+// default overrideable flag specifications to be bound to options.
33
+func NewDiagnosticsFlagInfos() *DiagnosticsFlagInfos {
34
+	return &DiagnosticsFlagInfos{
35
+		Diagnostics: FlagInfo{FlagDiagnosticsName, "d", "", `comma-separated list of diagnostic names to run, e.g. "systemd.AnalyzeLogs"`},
36
+		DiagLevel:   FlagInfo{FlagLevelName, "l", "3", "Level of diagnostic output: 0: Error, 1: Warn, 2: Notice, 3: Info, 4: Debug"},
37
+		DiagFormat:  FlagInfo{FlagFormatName, "o", "text", "Output format: text|json|yaml"},
38
+	}
39
+}
40
+
41
+func (o *DiagnosticsOptions) BindFlags(cmdFlags *pflag.FlagSet, flagInfos *DiagnosticsFlagInfos) {
42
+	flagInfos.Diagnostics.BindListFlag(cmdFlags, o.Diagnostics)
43
+	flagInfos.DiagLevel.BindIntFlag(cmdFlags, &o.DiagLevel)
44
+	flagInfos.DiagFormat.BindStringFlag(cmdFlags, &o.DiagFormat)
45
+}
0 46
new file mode 100644
... ...
@@ -0,0 +1,57 @@
0
+package options
1
+
2
+import (
3
+	kclientcmd "github.com/GoogleCloudPlatform/kubernetes/pkg/client/clientcmd"
4
+	kutil "github.com/GoogleCloudPlatform/kubernetes/pkg/util"
5
+	"github.com/spf13/pflag"
6
+	"strconv"
7
+)
8
+
9
+type FlagInfo kclientcmd.FlagInfo // reuse to add methods
10
+
11
+// FlagInfos serve as a customizable intermediary between the command flags and
12
+// the options object they feed into. This enables reuse of the flags and options
13
+// with tweaked definitions in different contexts if necessary.
14
+
15
+func (i FlagInfo) BindStringFlag(flags *pflag.FlagSet, target *string) {
16
+	// assume flags with no longname are not desired
17
+	if len(i.LongName) > 0 {
18
+		flags.StringVarP(target, i.LongName, i.ShortName, i.Default, i.Description)
19
+	}
20
+}
21
+
22
+func (i FlagInfo) BindIntFlag(flags *pflag.FlagSet, target *int) {
23
+	// assume flags with no longname are not desired
24
+	if len(i.LongName) > 0 {
25
+		// try to parse Default as an int.  If it fails, assume 0
26
+		intVal, _ := strconv.ParseInt(i.Default, 10, 0)
27
+		flags.IntVarP(target, i.LongName, i.ShortName, int(intVal), i.Description)
28
+	}
29
+}
30
+
31
+func (i FlagInfo) BindBoolFlag(flags *pflag.FlagSet, target *bool) {
32
+	// assume flags with no longname are not desired
33
+	if len(i.LongName) > 0 {
34
+		// try to parse Default as a bool.  If it fails, assume false
35
+		boolVal, _ := strconv.ParseBool(i.Default)
36
+		flags.BoolVarP(target, i.LongName, i.ShortName, boolVal, i.Description)
37
+	}
38
+}
39
+
40
+func (i FlagInfo) BindListFlag(flags *pflag.FlagSet, target *kutil.StringList) {
41
+	// assume flags with no longname are not desired
42
+	if len(i.LongName) > 0 {
43
+		flags.VarP(target, i.LongName, i.ShortName, i.Description)
44
+	}
45
+}
46
+
47
+const (
48
+	FlagAllClientConfigName = "client-config"
49
+	FlagAllMasterConfigName = "master-config"
50
+	FlagAllNodeConfigName   = "node-config"
51
+	FlagDiagnosticsName     = "diagnostics"
52
+	FlagLevelName           = "diaglevel"
53
+	FlagFormatName          = "output"
54
+	FlagMasterConfigName    = "config"
55
+	FlagNodeConfigName      = "config"
56
+)
0 57
new file mode 100644
... ...
@@ -0,0 +1,45 @@
0
+package options
1
+
2
+import (
3
+	"github.com/openshift/origin/pkg/cmd/server/start"
4
+	"github.com/spf13/pflag"
5
+	"io"
6
+)
7
+
8
+// user options for openshift-diagnostics master command
9
+type MasterDiagnosticsOptions struct {
10
+	DiagOptions *DiagnosticsOptions
11
+	MustCheck   bool // set for "diagnostics master" which requires diagnosing master even if there is no config file
12
+	// reuse the master options from "openshift start master"
13
+	MasterStartOptions *start.MasterOptions
14
+}
15
+
16
+// definitions used to bind the options to actual flags on a command
17
+type MasterDiagnosticsFlagInfos struct {
18
+	ConfigFile FlagInfo
19
+}
20
+
21
+// supply output writer or pre-created DiagnosticsOptions
22
+func NewMasterDiagnosticsOptions(out io.Writer, opts *DiagnosticsOptions) *MasterDiagnosticsOptions {
23
+	if opts != nil {
24
+		return &MasterDiagnosticsOptions{
25
+			DiagOptions: opts,
26
+		}
27
+	} else if out != nil {
28
+		return &MasterDiagnosticsOptions{
29
+			DiagOptions: NewDiagnosticsOptions(out),
30
+		}
31
+	}
32
+	return nil
33
+}
34
+
35
+// default overrideable flag specifications to be bound to options.
36
+func NewMasterDiagnosticsFlagInfos() *MasterDiagnosticsFlagInfos {
37
+	return &MasterDiagnosticsFlagInfos{
38
+		ConfigFile: FlagInfo{FlagMasterConfigName, "", "", "Location of the master configuration file to run from. When running from a configuration file, all other command-line arguments are ignored."},
39
+	}
40
+}
41
+
42
+func (o *MasterDiagnosticsOptions) BindFlags(cmdFlags *pflag.FlagSet, flagInfos *MasterDiagnosticsFlagInfos) {
43
+	flagInfos.ConfigFile.BindStringFlag(cmdFlags, &o.MasterStartOptions.ConfigFile)
44
+}
0 45
new file mode 100644
... ...
@@ -0,0 +1,45 @@
0
+package options
1
+
2
+import (
3
+	"github.com/openshift/origin/pkg/cmd/server/start"
4
+	"github.com/spf13/pflag"
5
+	"io"
6
+)
7
+
8
+// user options for openshift-diagnostics node command
9
+type NodeDiagnosticsOptions struct {
10
+	DiagOptions *DiagnosticsOptions
11
+	MustCheck   bool // set for "diagnostics node" which requires diagnosing node even if there is no config file
12
+	// reuse the node options from "openshift start node"
13
+	NodeStartOptions *start.NodeOptions
14
+}
15
+
16
+// definitions used to bind the options to actual flags on a command
17
+type NodeDiagnosticsFlagInfos struct {
18
+	ConfigFile FlagInfo
19
+}
20
+
21
+// supply output writer or pre-created DiagnosticsOptions
22
+func NewNodeDiagnosticsOptions(out io.Writer, opts *DiagnosticsOptions) *NodeDiagnosticsOptions {
23
+	if opts != nil {
24
+		return &NodeDiagnosticsOptions{
25
+			DiagOptions: opts,
26
+		}
27
+	} else if out != nil {
28
+		return &NodeDiagnosticsOptions{
29
+			DiagOptions: NewDiagnosticsOptions(out),
30
+		}
31
+	}
32
+	return nil
33
+}
34
+
35
+// default overrideable flag specifications to be bound to options.
36
+func NewNodeDiagnosticsFlagInfos() *NodeDiagnosticsFlagInfos {
37
+	return &NodeDiagnosticsFlagInfos{
38
+		ConfigFile: FlagInfo{FlagNodeConfigName, "", "", "Location of the node configuration file to run from. When running from a configuration file, all other command-line arguments are ignored."},
39
+	}
40
+}
41
+
42
+func (o *NodeDiagnosticsOptions) BindFlags(cmdFlags *pflag.FlagSet, flagInfos *NodeDiagnosticsFlagInfos) {
43
+	flagInfos.ConfigFile.BindStringFlag(cmdFlags, &o.NodeStartOptions.ConfigFile)
44
+}
... ...
@@ -12,6 +12,7 @@ import (
12 12
 	"github.com/openshift/origin/pkg/cmd/cli"
13 13
 	"github.com/openshift/origin/pkg/cmd/cli/cmd"
14 14
 	"github.com/openshift/origin/pkg/cmd/experimental/buildchain"
15
+	diagnostics "github.com/openshift/origin/pkg/cmd/experimental/diagnostics"
15 16
 	exipfailover "github.com/openshift/origin/pkg/cmd/experimental/ipfailover"
16 17
 	"github.com/openshift/origin/pkg/cmd/experimental/tokens"
17 18
 	"github.com/openshift/origin/pkg/cmd/flagtypes"
... ...
@@ -52,6 +53,8 @@ func CommandFor(basename string) *cobra.Command {
52 52
 		cmd = irouter.NewCommandRouter(basename)
53 53
 	case "openshift-deploy":
54 54
 		cmd = deployer.NewCommandDeployer(basename)
55
+	case "openshift-diagnostics":
56
+		cmd = diagnostics.NewCommandDiagnostics(basename, basename, os.Stdout)
55 57
 	case "openshift-sti-build":
56 58
 		cmd = builder.NewCommandSTIBuilder(basename)
57 59
 	case "openshift-docker-build":
... ...
@@ -155,6 +158,7 @@ func newExperimentalCommand(name, fullName string) *cobra.Command {
155 155
 	experimental.AddCommand(tokens.NewCmdTokens(tokens.TokenRecommendedCommandName, fullName+" "+tokens.TokenRecommendedCommandName, f, out))
156 156
 	experimental.AddCommand(exipfailover.NewCmdIPFailoverConfig(f, fullName, "ipfailover", out))
157 157
 	experimental.AddCommand(buildchain.NewCmdBuildChain(name, fullName+" "+buildchain.BuildChainRecommendedCommandName, f, out))
158
+	experimental.AddCommand(diagnostics.NewCommandDiagnostics("diagnostics", fullName+" diagnostics", out))
158 159
 	experimental.AddCommand(cmd.NewCmdOptions(out))
159 160
 	return experimental
160 161
 }
161 162
new file mode 100644
... ...
@@ -0,0 +1,84 @@
0
+OpenShift v3 Diagnostics
1
+========================
2
+
3
+This is a tool to help administrators and users resolve common problems
4
+that occur with OpenShift v3 deployments. It is currently (May 2015)
5
+under continuous development as the OpenShift Origin project progresses.
6
+
7
+The goals of the diagnostics tool are summarized in this [Trello
8
+card](https://trello.com/c/LdUogKuN). Diagnostics are included as an
9
+`openshift` binary sub-command that analyzes OpenShift as it finds it,
10
+whether from the perspective of an OpenShift client or on an OpenShift
11
+host.
12
+
13
+Expected environment
14
+====================
15
+
16
+OpenShift can be deployed in many ways: built from source, included
17
+in a VM image, in a Docker image, or as enterprise RPMs. Each of these
18
+would imply different configuration and environment. In order to keep
19
+assumptions about environment to a minimum, the diagnostics have been
20
+added to the `openshift` binary itself so that wherever there is an
21
+OpenShift server or client, the diagnostics can run in the exact same
22
+environment.
23
+
24
+`openshift ex diagnostics` subcommands for master, node, and client
25
+provide flags to mimic the configurations for those respective components,
26
+so that running diagnostics against a component should be as simple as
27
+supplying the same flags that would invoke the component. So,
28
+for example, if a master is started with:
29
+
30
+    openshift start master --public-hostname=...
31
+
32
+Then diagnostics against that master would simply be run as:
33
+
34
+    openshift ex diagnostics master --public-hostname=...
35
+
36
+In this way it should be possible to invoke diagnostics against any
37
+given environment.
38
+
39
+Host environment
40
+================
41
+
42
+However, master/node diagnostics will be most useful in a specific
43
+target environment, which is a deployment using Enterprise RPMs and
44
+ansible deployment logic. This provides two major benefits:
45
+
46
+* master/node configuration is based on a configuration file in a standard location
47
+* all components log to journald
48
+
49
+Having configuration file in standard locations means you will generally
50
+not even need to specify where to find them. Running:
51
+
52
+    openshift ex diagnostics
53
+
54
+by itself will look for master and node configs (in addition to client
55
+config file) in the standard locations and use them if found; so this
56
+should make the Enterprise use case as simple as possible. It's also
57
+very easy to use configuration files when they are not in the expected
58
+Enterprise locations:
59
+
60
+    openshift ex diagnostics --master-config=... --node-config=...
61
+
62
+Having logs in journald is necessary for the current log analysis
63
+logic. Other usage may have logs going into files, output to stdout,
64
+combined node/master... it may not be too hard to extend analysis to
65
+other log sources but the priority has been to look at journald logs
66
+as created by components in Enterprise deployments (including docker,
67
+openvswitch, etc.).
68
+
69
+Client environment
70
+==================
71
+
72
+The user may only have access as an ordinary user, as a cluster-admin
73
+user, or may have admin on a host where OpenShift master or node services
74
+are operating. The diagnostics will attempt to use as much access as
75
+the user has available.
76
+
77
+A client with ordinary access should be able to diagnose its connection
78
+to the master and look for problems in builds and deployments.
79
+
80
+A client with cluster-admin access should be able to diagnose the same
81
+things for every project in the deployment, as well as infrastructure
82
+status.
83
+
0 84
new file mode 100644
... ...
@@ -0,0 +1,293 @@
0
+package client
1
+
2
+import (
3
+	"fmt"
4
+	kapi "github.com/GoogleCloudPlatform/kubernetes/pkg/api"
5
+	kerrs "github.com/GoogleCloudPlatform/kubernetes/pkg/api/errors"
6
+	client "github.com/GoogleCloudPlatform/kubernetes/pkg/client"
7
+	clientcmdapi "github.com/GoogleCloudPlatform/kubernetes/pkg/client/clientcmd/api"
8
+	"github.com/GoogleCloudPlatform/kubernetes/pkg/fields"
9
+	"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
10
+	osclient "github.com/openshift/origin/pkg/client"
11
+	"github.com/openshift/origin/pkg/diagnostics/discovery"
12
+	"github.com/openshift/origin/pkg/diagnostics/log"
13
+	"github.com/openshift/origin/pkg/diagnostics/types/diagnostic"
14
+	osapi "github.com/openshift/origin/pkg/image/api"
15
+	"reflect"
16
+	"strings"
17
+)
18
+
19
+var Diagnostics = map[string]diagnostic.Diagnostic{
20
+	"NodeDefinitions": {
21
+		Description: "Check node records on master",
22
+		Condition: func(env *discovery.Environment) (skip bool, reason string) {
23
+			if env.ClusterAdminFactory == nil {
24
+				return true, "Client does not have cluster-admin access and cannot see node records"
25
+			}
26
+			return false, ""
27
+		},
28
+		Run: func(env *discovery.Environment) {
29
+			var err error
30
+			var nodes *kapi.NodeList
31
+			if _, kclient, err := env.ClusterAdminFactory.Clients(); err == nil {
32
+				nodes, err = kclient.Nodes().List(labels.LabelSelector{}, fields.Everything())
33
+			}
34
+			if err != nil {
35
+				env.Log.Errorf("clGetNodesFailed", `
36
+Client error while retrieving node records. Client retrieved records
37
+during discovery, so this is likely to be a transient error. Try running
38
+diagnostics again. If this message persists, there may be a permissions
39
+problem with getting node records. The error was:
40
+
41
+(%T) %[1]v`, err)
42
+				return
43
+			}
44
+			for _, node := range nodes.Items {
45
+				//pretty.Println("Node record:", node)
46
+				var ready *kapi.NodeCondition
47
+				for i, condition := range node.Status.Conditions {
48
+					switch condition.Type {
49
+					// currently only one... used to be more, may be again
50
+					case kapi.NodeReady:
51
+						ready = &node.Status.Conditions[i]
52
+					}
53
+				}
54
+				//pretty.Println("Node conditions for "+node.Name, ready, schedulable)
55
+				if ready == nil || ready.Status != kapi.ConditionTrue {
56
+					msg := log.Msg{
57
+						"node": node.Name,
58
+						"tmpl": `
59
+Node {{.node}} is defined but is not marked as ready.
60
+Ready status is {{.status}} because "{{.reason}}"
61
+If the node is not intentionally disabled, check that the master can
62
+reach the node hostname for a health check and the node is checking in
63
+to the master with the same hostname.
64
+
65
+While in this state, pods should not be scheduled to deploy on the node,
66
+and any existing scheduled pods will be considered failed and removed.
67
+ `,
68
+					}
69
+					if ready == nil {
70
+						msg["status"] = "None"
71
+						msg["reason"] = "There is no readiness record."
72
+					} else {
73
+						msg["status"] = ready.Status
74
+						msg["reason"] = ready.Reason
75
+					}
76
+					env.Log.Warnm("clNodeBroken", msg)
77
+				}
78
+			}
79
+		},
80
+	},
81
+
82
+	"ConfigContexts": {
83
+		Description: "Test that client config contexts have no undefined references",
84
+		Condition: func(env *discovery.Environment) (skip bool, reason string) {
85
+			if env.ClientConfigRaw == nil {
86
+				return true, "There is no client config file"
87
+			}
88
+			return false, ""
89
+		},
90
+		Run: func(env *discovery.Environment) {
91
+			cc := env.ClientConfigRaw
92
+			current := cc.CurrentContext
93
+			ccSuccess := false
94
+			var ccResult log.Msg //nil
95
+			for context := range cc.Contexts {
96
+				result, success := TestContext(context, cc)
97
+				msg := log.Msg{"tmpl": "For client config context '{{.context}}':{{.result}}", "context": context, "result": result}
98
+				if context == current {
99
+					ccResult, ccSuccess = msg, success
100
+				} else if success {
101
+					env.Log.Infom("clientCfgSuccess", msg)
102
+				} else {
103
+					env.Log.Warnm("clientCfgWarn", msg)
104
+				}
105
+			}
106
+			if _, exists := cc.Contexts[current]; exists {
107
+				ccResult["tmpl"] = `
108
+The current context from client config is '{{.context}}'
109
+This will be used by default to contact your OpenShift server.
110
+` + ccResult["tmpl"].(string)
111
+				if ccSuccess {
112
+					env.Log.Infom("currentccSuccess", ccResult)
113
+				} else {
114
+					env.Log.Errorm("currentccWarn", ccResult)
115
+				}
116
+			} else { // context does not exist
117
+				env.Log.Errorm("cConUndef", log.Msg{"tmpl": `
118
+Your client config specifies a current context of '{{.context}}'
119
+which is not defined; it is likely that a mistake was introduced while
120
+manually editing your config. If this is a simple typo, you may be
121
+able to fix it manually.
122
+The OpenShift master creates a fresh config when it is started; it may be
123
+useful to use this as a base if available.`, "context": current})
124
+			}
125
+		},
126
+	},
127
+
128
+	"ClusterRegistry": {
129
+		Description: "Check there is a working Docker registry",
130
+		Condition: func(env *discovery.Environment) (skip bool, reason string) {
131
+			if env.ClusterAdminFactory == nil {
132
+				return true, "Client does not have cluster-admin access and cannot see registry objects"
133
+			}
134
+			return false, ""
135
+		},
136
+		Run: func(env *discovery.Environment) {
137
+			osClient, kclient, err := env.ClusterAdminFactory.Clients()
138
+			if err != nil {
139
+				env.Log.Errorf("clGetClientFailed", "Constructing clients failed. This should never happen. Error: (%T) %[1]v", err)
140
+				return
141
+			}
142
+			// retrieve the service if it exists
143
+			if service := getRegistryService(kclient, env.Log); service != nil {
144
+				// Check that it actually has a pod selected that's running
145
+				if pod := getRegistryPod(kclient, service, env.Log); pod != nil {
146
+					// Check that an endpoint exists on the service
147
+					if endPoint := getRegistryEndpoint(kclient, env.Log); endPoint != nil {
148
+						// TODO: Check that endpoints on the service match the pod (hasn't been a problem yet though)
149
+						// TODO: Check the logs for that pod for common issues (credentials, DNS resolution failure)
150
+						// attempt to create an imagestream and see if it gets the same registry service IP from the service cache
151
+						testRegistryImageStream(osClient, service, env.Log)
152
+					}
153
+				}
154
+			}
155
+
156
+		},
157
+	},
158
+}
159
+
160
+func TestContext(contextName string, config *clientcmdapi.Config) (result string, success bool) {
161
+	context, exists := config.Contexts[contextName]
162
+	if !exists {
163
+		return "client config context '" + contextName + "' is not defined.", false
164
+	}
165
+	clusterName := context.Cluster
166
+	cluster, exists := config.Clusters[clusterName]
167
+	if !exists {
168
+		return fmt.Sprintf("client config context '%s' has a cluster '%s' which is not defined.", contextName, clusterName), false
169
+	}
170
+	authName := context.AuthInfo
171
+	if _, exists := config.AuthInfos[authName]; !exists {
172
+		return fmt.Sprintf("client config context '%s' has a user identity '%s' which is not defined.", contextName, authName), false
173
+	}
174
+	project := context.Namespace
175
+	if project == "" {
176
+		project = kapi.NamespaceDefault // OpenShift/k8s fills this in if missing
177
+	}
178
+	// TODO: actually send a request to see if can connect
179
+	return fmt.Sprintf(`
180
+The server URL is '%s'
181
+The user authentication is '%s'
182
+The current project is '%s'`, cluster.Server, authName, project), true
183
+}
184
+
185
+func getRegistryService(kclient *client.Client, logger *log.Logger) *kapi.Service {
186
+	service, err := kclient.Services("default").Get("docker-registry")
187
+	if err != nil && reflect.TypeOf(err) == reflect.TypeOf(&kerrs.StatusError{}) {
188
+		logger.Warnf("clGetRegFailed", `
189
+There is no "docker-registry" service. This is not strictly required
190
+to use OpenShift, however it is required for builds and its absence
191
+probably indicates an incomplete installation of OpenShift.
192
+
193
+Please use the 'osadm registry' command to create a registry.
194
+				`)
195
+		return nil
196
+	} else if err != nil {
197
+		logger.Errorf("clGetRegFailed", `
198
+Client error while retrieving registry service. Client retrieved records
199
+during discovery, so this is likely to be a transient error. Try running
200
+diagnostics again. If this message persists, there may be a permissions
201
+problem with getting records. The error was:
202
+
203
+(%T) %[1]v`, err)
204
+		return nil
205
+	}
206
+	logger.Debugf("clRegFound", "Found docker-registry service with ports %v", service.Spec.Ports)
207
+	return service
208
+}
209
+
210
+func getRegistryPod(kclient *client.Client, service *kapi.Service, logger *log.Logger) *kapi.Pod {
211
+	pods, err := kclient.Pods("default").List(labels.SelectorFromSet(service.Spec.Selector), fields.Everything())
212
+	if err != nil {
213
+		logger.Errorf("clRegListPods", "Finding pods for 'docker-registry' service failed. This should never happen. Error: (%T) %[1]v", err)
214
+		return nil
215
+	} else if len(pods.Items) < 1 {
216
+		logger.Error("clRegNoPods", `
217
+The "docker-registry" service exists but has no associated pods, so it
218
+is not available. Builds and deployments that use the registry will fail.`)
219
+		return nil
220
+	} else if len(pods.Items) > 1 {
221
+		logger.Error("clRegNoPods", `
222
+The "docker-registry" service has multiple associated pods. Load-balanced
223
+registries are not yet available, so these are likely to have incomplete
224
+stores of images. Builds and deployments that use the registry will
225
+fail sporadically.`)
226
+		return nil
227
+	}
228
+	pod := &pods.Items[0]
229
+	if pod.Status.Phase != kapi.PodRunning {
230
+		logger.Errorf("clRegPodDown", `
231
+The "%s" pod for the "docker-registry" service is not running.
232
+This may be transient, a scheduling error, or something else.
233
+Builds and deployments that require the registry will fail.`, pod.ObjectMeta.Name)
234
+		return nil
235
+	}
236
+	logger.Debugf("clRegPodFound", "Found docker-registry pod with name %s", pod.ObjectMeta.Name)
237
+	return pod
238
+}
239
+
240
+func getRegistryEndpoint(kclient *client.Client, logger *log.Logger) *kapi.Endpoints {
241
+	endPoint, err := kclient.Endpoints("default").Get("docker-registry")
242
+	if err != nil {
243
+		logger.Errorf("clRegGetEP", "Finding endpoints for 'docker-registry' service failed. This should never happen. Error: (%T) %[1]v", err)
244
+		return nil
245
+	} else if len(endPoint.Subsets) != 1 || len(endPoint.Subsets[0].Addresses) != 1 {
246
+		logger.Warn("clRegNoEP", `
247
+The "docker-registry" service exists with one associated pod, but the
248
+number of endpoints in the "docker-registry" endpoint object does not
249
+match. This mismatch probably indicates a bug in OpenShift and it is
250
+likely that builds and deployments that require the registry will fail.`)
251
+		return nil
252
+	}
253
+	logger.Debugf("clRegPodFound", "Found docker-registry endpoint object")
254
+	return endPoint
255
+}
256
+
257
+func testRegistryImageStream(client *osclient.Client, service *kapi.Service, logger *log.Logger) {
258
+	imgStream, err := client.ImageStreams("default").Create(&osapi.ImageStream{ObjectMeta: kapi.ObjectMeta{GenerateName: "diagnostic-test-"}})
259
+	if err != nil {
260
+		logger.Errorf("clRegISCFail", "Creating test ImageStream failed. Error: (%T) %[1]v", err)
261
+		return
262
+	}
263
+	defer client.ImageStreams("default").Delete(imgStream.ObjectMeta.Name)         // TODO: report if deleting fails
264
+	imgStream, err = client.ImageStreams("default").Get(imgStream.ObjectMeta.Name) // status is filled in post-create
265
+	if err != nil {
266
+		logger.Errorf("clRegISCFail", "Getting created test ImageStream failed. Error: (%T) %[1]v", err)
267
+		return
268
+	}
269
+	logger.Debugf("clRegISC", "Created test ImageStream: %[1]v", imgStream)
270
+	cacheHost := strings.SplitN(imgStream.Status.DockerImageRepository, "/", 2)[0]
271
+	serviceHost := fmt.Sprintf("%s:%d", service.Spec.PortalIP, service.Spec.Ports[0].Port)
272
+	if cacheHost != serviceHost {
273
+		logger.Errorm("clRegISMismatch", log.Msg{
274
+			"serviceHost": serviceHost,
275
+			"cacheHost":   cacheHost,
276
+			"tmpl": `
277
+Diagnostics created a test ImageStream and compared the registry IP
278
+it received to the registry IP available via the docker-registry service.
279
+
280
+docker-registry      : {{.serviceHost}}
281
+ImageStream registry : {{.cacheHost}}
282
+
283
+They differ, which probably means that an administrator re-created
284
+the docker-registry service but the master has cached the old service
285
+IP address. Builds or deployments that use ImageStreams with the wrong
286
+docker-registry IP will fail under this condition.
287
+
288
+To resolve this issue, restarting the master (to clear the cache) should
289
+be sufficient.
290
+`})
291
+	}
292
+}
0 293
new file mode 100644
... ...
@@ -0,0 +1,104 @@
0
+package discovery // client
1
+
2
+import (
3
+	"fmt"
4
+	"github.com/openshift/origin/pkg/diagnostics/log"
5
+	"github.com/openshift/origin/pkg/diagnostics/types"
6
+	"os"
7
+	"os/exec"
8
+	"path/filepath"
9
+	"runtime"
10
+	"strings"
11
+)
12
+
13
+// ----------------------------------------------------------
14
+// Look for 'osc' and 'openshift' executables
15
+func (env *Environment) DiscoverClient() error {
16
+	var err error
17
+	f := env.Options.ClientDiagOptions.Factory
18
+	if config, err := f.OpenShiftClientConfig.RawConfig(); err != nil {
19
+		env.Log.Errorf("discCCstart", "Could not read client config: (%T) %[1]v", err)
20
+	} else {
21
+		env.OsConfig = &config
22
+		env.FactoryForContext[config.CurrentContext] = f
23
+	}
24
+	env.Log.Debug("discSearchExec", "Searching for executables in path:\n  "+strings.Join(filepath.SplitList(os.Getenv("PATH")), "\n  ")) //TODO for non-Linux OS
25
+	env.OscPath = env.findExecAndLog("osc")
26
+	if env.OscPath != "" {
27
+		env.OscVersion, err = getExecVersion(env.OscPath, env.Log)
28
+	}
29
+	env.OpenshiftPath = env.findExecAndLog("openshift")
30
+	if env.OpenshiftPath != "" {
31
+		env.OpenshiftVersion, err = getExecVersion(env.OpenshiftPath, env.Log)
32
+	}
33
+	if env.OpenshiftVersion.NonZero() && env.OscVersion.NonZero() && !env.OpenshiftVersion.Eq(env.OscVersion) {
34
+		env.Log.Warnm("discVersionMM", log.Msg{"osV": env.OpenshiftVersion.GoString(), "oscV": env.OscVersion.GoString(),
35
+			"text": fmt.Sprintf("'openshift' version %#v does not match 'osc' version %#v; update or remove the lower version", env.OpenshiftVersion, env.OscVersion)})
36
+	}
37
+	return err
38
+}
39
+
40
+// ----------------------------------------------------------
41
+// Look for a specific executable and log what happens
42
+func (env *Environment) findExecAndLog(cmd string) string {
43
+	if path := findExecFor(cmd); path != "" {
44
+		env.Log.Infom("discExecFound", log.Msg{"command": cmd, "path": path, "tmpl": "Found '{{.command}}' at {{.path}}"})
45
+		return path
46
+	} else {
47
+		env.Log.Warnm("discExecNoPath", log.Msg{"command": cmd, "tmpl": "No '{{.command}}' executable was found in your path"})
48
+	}
49
+	return ""
50
+}
51
+
52
+// ----------------------------------------------------------
53
+// Look in the path for an executable
54
+func findExecFor(cmd string) string {
55
+	path, err := exec.LookPath(cmd)
56
+	if err == nil {
57
+		return path
58
+	}
59
+	if runtime.GOOS == "windows" {
60
+		path, err = exec.LookPath(cmd + ".exe")
61
+		if err == nil {
62
+			return path
63
+		}
64
+	}
65
+	return ""
66
+}
67
+
68
+// ----------------------------------------------------------
69
+// Invoke executable's "version" command to determine version
70
+func getExecVersion(path string, logger *log.Logger) (version types.Version, err error) {
71
+	cmd := exec.Command(path, "version")
72
+	var out []byte
73
+	out, err = cmd.CombinedOutput()
74
+	if err == nil {
75
+		var name string
76
+		var x, y, z int
77
+		if scanned, err := fmt.Sscanf(string(out), "%s v%d.%d.%d", &name, &x, &y, &z); scanned > 1 {
78
+			version = types.Version{x, y, z}
79
+			logger.Infom("discVersion", log.Msg{"tmpl": "version of {{.command}} is {{.version}}", "command": name, "version": version.GoString()})
80
+		} else {
81
+			logger.Errorf("discVersErr", `
82
+Expected version output from '%s version'
83
+Could not parse output received:
84
+%v
85
+Error was: %#v`, path, string(out), err)
86
+		}
87
+	} else {
88
+		switch err.(type) {
89
+		case *exec.Error:
90
+			logger.Errorf("discVersErr", "error in executing '%v version': %v", path, err)
91
+		case *exec.ExitError:
92
+			logger.Errorf("discVersErr", `
93
+Executed '%v version' which exited with an error code.
94
+This version is likely old or broken.
95
+Error was '%v';
96
+Output was:
97
+%v`, path, err.Error(), log.LimitLines(string(out), 5))
98
+		default:
99
+			logger.Errorf("discVersErr", "executed '%v version' but an error occurred:\n%v\nOutput was:\n%v", path, err, string(out))
100
+		}
101
+	}
102
+	return version, err
103
+}
0 104
new file mode 100644
... ...
@@ -0,0 +1,378 @@
0
+package discovery // config
1
+
2
+import (
3
+	"fmt"
4
+	kapi "github.com/GoogleCloudPlatform/kubernetes/pkg/api"
5
+	"github.com/GoogleCloudPlatform/kubernetes/pkg/client/clientcmd"
6
+	clientcmdapi "github.com/GoogleCloudPlatform/kubernetes/pkg/client/clientcmd/api"
7
+	"github.com/GoogleCloudPlatform/kubernetes/pkg/fields"
8
+	"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
9
+	"github.com/openshift/origin/pkg/cmd/cli/config"
10
+	"github.com/openshift/origin/pkg/cmd/experimental/diagnostics/options"
11
+	osclientcmd "github.com/openshift/origin/pkg/cmd/util/clientcmd"
12
+	"github.com/openshift/origin/pkg/diagnostics/log"
13
+	"io/ioutil"
14
+	"os"
15
+	"regexp"
16
+	"strings"
17
+)
18
+
19
+/* ----------------------------------------------------------
20
+Look for the client config and try to read it.
21
+
22
+We will look in the standard locations, alert the user to what we find
23
+as we go along, and try to be helpful.
24
+*/
25
+
26
+// -------------------------------------------------------------
27
+// Look for client config file in a number of possible locations
28
+func (env *Environment) ReadClientConfigFiles() {
29
+	confFlagName := options.FlagAllClientConfigName
30
+	confFlag := env.Options.ClientConfigPath // from openshift-diagnostics --client-config
31
+	if flags := env.Options.GlobalFlags; flags != nil {
32
+		name := config.OpenShiftConfigFlagName
33
+		if flag := env.Options.GlobalFlags.Lookup(name); flag != nil {
34
+			confFlag = flag.Value.String() // from openshift-diagnostics client --config
35
+			confFlagName = name
36
+		}
37
+	}
38
+	var found bool
39
+	rules := config.NewOpenShiftClientConfigLoadingRules()
40
+	paths := append([]string{confFlag}, rules.Precedence...)
41
+	for index, path := range paths {
42
+		errmsg := ""
43
+		switch index {
44
+		case 0:
45
+			errmsg = fmt.Sprintf("--"+confFlagName+" specified that client config should be at %s\n", path)
46
+		case len(paths) - 1:
47
+			// do nothing, the config wasn't found in ~
48
+		default:
49
+			if len(os.Getenv(config.OpenShiftConfigPathEnvVar)) != 0 {
50
+				errmsg = fmt.Sprintf("$OPENSHIFTCONFIG specified that client config should be at %s\n", path)
51
+			}
52
+		}
53
+
54
+		if rawConfig := openConfigFile(path, errmsg, env.Log); rawConfig != nil && !found {
55
+			found = true
56
+			env.ClientConfigPath = path
57
+			env.ClientConfigRaw = rawConfig
58
+		}
59
+	}
60
+	if found {
61
+		if confFlag != "" && confFlag != env.ClientConfigPath {
62
+			// found config but not where --config said, so don't continue discovery
63
+			env.Log.Errorf("discCCnotFlag", `
64
+The client configuration file was not found where the --%s flag indicated:
65
+  %s
66
+A config file was found at the following location:
67
+  %s
68
+If you wish to use this file for client configuration, you can specify it
69
+with the --%[1]s flag, or just not specify the flag.
70
+			`, confFlagName, confFlag, env.ClientConfigPath)
71
+		} else {
72
+			// happy path, client config found as expected
73
+			env.WillCheck[ClientTarget] = true
74
+		}
75
+	} else { // not found, decide what to do
76
+		if confFlag != "" { // user expected conf file at specific place
77
+			env.Log.Errorf("discNoCC", "The client configuration file was not found where --%s='%s' indicated.", confFlagName, confFlag)
78
+		} else if !env.Options.ClientDiagOptions.MustCheck {
79
+			env.Log.Notice("discSkipCLI", "No client config file found; client diagnostics will not be performed.")
80
+		} else {
81
+			// user specifically wants to troubleshoot client, but no conf file given
82
+			env.Log.Warn("discNoCCfile", "No client config file read; OpenShift client diagnostics will use flags and default configuration.")
83
+			env.WillCheck[ClientTarget] = true
84
+			adminPaths := []string{
85
+				"/etc/openshift/master/admin.kubeconfig",           // enterprise
86
+				"/openshift.local.config/master/admin.kubeconfig",  // origin systemd
87
+				"./openshift.local.config/master/admin.kubeconfig", // origin binary
88
+			}
89
+			adminWarningF := `
90
+No client config file was available; however, one exists at
91
+  %[1]s
92
+which is a standard location where the master generates it.
93
+If this is what you want to use, you should copy it to a standard location
94
+(~/.config/openshift/.config, or the current directory), or you can set the
95
+environment variable OPENSHIFTCONFIG in your ~/.bash_profile:
96
+  export OPENSHIFTCONFIG=%[1]s
97
+If this is not what you want, you should obtain a config file and
98
+place it in a standard location.
99
+`
100
+			// look for it in auto-generated locations when not found properly
101
+			for _, path := range adminPaths {
102
+				if conf := openConfigFile(path, "", env.Log); conf != nil {
103
+					env.Log.Warnf("discCCautoPath", adminWarningF, path)
104
+					break
105
+				}
106
+			}
107
+		}
108
+	}
109
+}
110
+
111
+// ----------------------------------------------------------
112
+// Attempt to open file at path as client config
113
+// If there is a problem and errmsg is set, log an error
114
+func openConfigFile(path string, errmsg string, logger *log.Logger) *clientcmdapi.Config {
115
+	var err error
116
+	var file *os.File
117
+	if path == "" { // empty param/envvar
118
+		return nil
119
+	} else if file, err = os.Open(path); err == nil {
120
+		logger.Debugm("discOpenCC", log.Msg{"tmpl": "Reading client config at {{.path}}", "path": path})
121
+	} else if errmsg == "" {
122
+		logger.Debugf("discOpenCCNo", "Could not read client config at %s:\n%#v", path, err)
123
+	} else if os.IsNotExist(err) {
124
+		logger.Error("discOpenCCNoExist", errmsg+"but that file does not exist.")
125
+	} else if os.IsPermission(err) {
126
+		logger.Error("discOpenCCNoPerm", errmsg+"but lack permission to read that file.")
127
+	} else {
128
+		logger.Errorf("discOpenCCErr", "%sbut there was an error opening it:\n%#v", errmsg, err)
129
+	}
130
+	if file != nil { // it is open for reading
131
+		defer file.Close()
132
+		if buffer, err := ioutil.ReadAll(file); err != nil {
133
+			logger.Errorf("discCCReadErr", "Unexpected error while reading client config file (%s): %v", path, err)
134
+		} else if conf, err := clientcmd.Load(buffer); err != nil {
135
+			logger.Errorf("discCCYamlErr", `
136
+Error reading YAML from client config file (%s):
137
+  %v
138
+This file may have been truncated or mis-edited.
139
+Please fix, remove, or obtain a new client config`, file.Name(), err)
140
+		} else {
141
+			logger.Infom("discCCRead", log.Msg{"tmpl": `Successfully read a client config file at '{{.path}}'`, "path": path})
142
+			/* Note, we're not going to use this config file directly.
143
+			 * Instead, we'll defer to the openshift client code to assimilate
144
+			 * flags, env vars, and the potential hierarchy of config files
145
+			 * into an actual configuration that the client uses.
146
+			 * However, for diagnostic purposes, record the first we find.
147
+			 */
148
+			return conf
149
+		}
150
+	}
151
+	return nil
152
+}
153
+
154
+/* The full client configuration may specify multiple contexts, each
155
+ * of which could be a different server, a different user, a different
156
+ * default project. We want to check which contexts have useful access,
157
+ * and record those. At this point, we should already have the factory
158
+ * for the current context. Factories embed config and a client cache,
159
+ * and since we want to do discovery for every available context, we are
160
+ * going to create a factory for each context. We will determine which
161
+ * context actually has access to the default project, preferring the
162
+ * current (default) context if it does. Connection errors should be
163
+ * diagnosed along the way.
164
+ */
165
+func (env *Environment) ConfigClient() {
166
+	if env.OsConfig != nil {
167
+		// TODO: run these in parallel, with a time limit so connection timeouts don't take forever
168
+		for cname, context := range env.OsConfig.Contexts {
169
+			// set context, create factory, see what's available
170
+			if env.FactoryForContext[cname] == nil {
171
+				//config := clientcmd.NewNonInteractiveClientConfig(env.Factory.OpenShiftClientConfig, cname, &clientcmd.ConfigOverrides{})
172
+				config := clientcmd.NewNonInteractiveClientConfig(*env.OsConfig, cname, &clientcmd.ConfigOverrides{})
173
+				f := osclientcmd.NewFactory(config)
174
+				//f.BindFlags(env.Flags.OpenshiftFlags)
175
+				env.FactoryForContext[cname] = f
176
+			}
177
+			if access := getContextAccess(env.FactoryForContext[cname], cname, context, env.Log); access != nil {
178
+				env.AccessForContext[cname] = access
179
+				if access.ClusterAdmin && (cname == env.OsConfig.CurrentContext || env.ClusterAdminFactory == nil) {
180
+					env.ClusterAdminFactory = env.FactoryForContext[cname]
181
+				}
182
+			}
183
+		}
184
+	}
185
+}
186
+
187
+// for now, only try to determine what namespaces a user can see
188
+func getContextAccess(factory *osclientcmd.Factory, ctxName string, ctx clientcmdapi.Context, logger *log.Logger) *ContextAccess {
189
+	// start by getting ready to log the result
190
+	msgText := "Testing client config context {{.context}}\nServer: {{.server}}\nUser: {{.user}}\n\n"
191
+	msg := log.Msg{"id": "discCCctx", "tmpl": msgText}
192
+	if config, err := factory.OpenShiftClientConfig.RawConfig(); err != nil {
193
+		logger.Errorf("discCCstart", "Could not read client config: (%T) %[1]v", err)
194
+		return nil
195
+	} else {
196
+		msg["context"] = ctxName
197
+		msg["server"] = config.Clusters[ctx.Cluster].Server
198
+		msg["user"] = ctx.AuthInfo
199
+	}
200
+	// actually go and request project list from the server
201
+	if osclient, _, err := factory.Clients(); err != nil {
202
+		logger.Errorf("discCCctxClients", "Failed to create client during discovery with error:\n(%T) %[1]v\nThis is probably an OpenShift bug.", err)
203
+		return nil
204
+	} else if projects, err := osclient.Projects().List(labels.Everything(), fields.Everything()); err == nil { // success!
205
+		list := projects.Items
206
+		if len(list) == 0 {
207
+			msg["tmpl"] = msgText + "Successfully requested project list, but it is empty, so user has no access to anything."
208
+			msg["projects"] = make([]string, 0)
209
+			logger.Infom("discCCctxSuccess", msg)
210
+			return nil
211
+		}
212
+		access := &ContextAccess{Projects: make([]string, len(list))}
213
+		for i, project := range list {
214
+			access.Projects[i] = project.Name
215
+			if project.Name == kapi.NamespaceDefault {
216
+				access.ClusterAdmin = true
217
+			}
218
+		}
219
+		if access.ClusterAdmin {
220
+			msg["tmpl"] = msgText + "Successfully requested project list; has access to default project, so assumed to be a cluster-admin"
221
+			logger.Infom("discCCctxSuccess", msg)
222
+		} else {
223
+			msg["tmpl"] = msgText + "Successfully requested project list; has access to project(s): {{.projectStr}}"
224
+			msg["projects"] = access.Projects
225
+			msg["projectStr"] = strings.Join(access.Projects, ", ")
226
+			logger.Infom("discCCctxSuccess", msg)
227
+		}
228
+		return access
229
+	} else { // something went wrong, so diagnose it
230
+		noResolveRx := regexp.MustCompile("dial tcp: lookup (\\S+): no such host")
231
+		unknownCaMsg := "x509: certificate signed by unknown authority"
232
+		unneededCaMsg := "specifying a root certificates file with the insecure flag is not allowed"
233
+		invalidCertNameRx := regexp.MustCompile("x509: certificate is valid for (\\S+, )+not (\\S+)")
234
+		connRefusedRx := regexp.MustCompile("dial tcp (\\S+): connection refused")
235
+		connTimeoutRx := regexp.MustCompile("dial tcp (\\S+): (?:connection timed out|i/o timeout)")
236
+		unauthenticatedMsg := `403 Forbidden: Forbidden: "/osapi/v1beta1/projects?namespace=" denied by default`
237
+		unauthorizedRx := regexp.MustCompile("401 Unauthorized: Unauthorized$")
238
+
239
+		malformedHTTPMsg := "malformed HTTP response"
240
+		malformedTLSMsg := "tls: oversized record received with length"
241
+
242
+		// interpret the error message for mere mortals
243
+		errm := err.Error()
244
+		var reason, errId string
245
+		switch {
246
+		case noResolveRx.MatchString(errm):
247
+			errId, reason = "clientNoResolve", `
248
+This usually means that the hostname does not resolve to an IP.
249
+Hostnames should usually be resolved via DNS or an /etc/hosts file.
250
+Ensure that the hostname resolves correctly from your host before proceeding.
251
+Of course, your config could also simply have the wrong hostname specified.
252
+`
253
+		case strings.Contains(errm, unknownCaMsg):
254
+			errId, reason = "clientUnknownCa", `
255
+This means that we cannot validate the certificate in use by the
256
+OpenShift API server, so we cannot securely communicate with it.
257
+Connections could be intercepted and your credentials stolen.
258
+
259
+Since the server certificate we see when connecting is not validated
260
+by public certificate authorities (CAs), you probably need to specify a
261
+certificate from a private CA to validate the connection.
262
+
263
+Your config may be specifying the wrong CA cert, or none, or there
264
+could actually be a man-in-the-middle attempting to intercept your
265
+connection.  If you are unconcerned about any of this, you can add the
266
+--insecure-skip-tls-verify flag to bypass secure (TLS) verification,
267
+but this is risky and should not be necessary.
268
+** Connections could be intercepted and your credentials stolen. **
269
+`
270
+		case strings.Contains(errm, unneededCaMsg):
271
+			errId, reason = "clientUnneededCa", `
272
+This means that for client connections to the OpenShift API server, you
273
+(or your kubeconfig) specified both a validating certificate authority
274
+and that the client should bypass connection security validation.
275
+
276
+This is not allowed because it is likely to be a mistake.
277
+
278
+If you want to use --insecure-skip-tls-verify to bypass security (which
279
+is usually a bad idea anyway), then you need to also clear the CA cert
280
+from your command line options or kubeconfig file(s). Of course, it
281
+would be far better to obtain and use a correct CA cert.
282
+`
283
+		case invalidCertNameRx.MatchString(errm):
284
+			match := invalidCertNameRx.FindStringSubmatch(errm)
285
+			serverHost := match[len(match)-1]
286
+			errId, reason = "clientInvCertName", fmt.Sprintf(`
287
+This means that the certificate in use by the OpenShift API server
288
+(master) does not match the hostname by which you are addressing it:
289
+  %s
290
+so a secure connection is not allowed. In theory, this *could* mean that
291
+someone is intercepting your connection and presenting a certificate
292
+that is valid but for a different server, which is why secure validation
293
+fails in this case.
294
+
295
+However, the most likely explanation is that the server certificate
296
+needs to be updated to include the name you are using to reach it.
297
+
298
+If the OpenShift server is generating its own certificates (which
299
+is default), then the --public-master flag on the OpenShift master is
300
+usually the easiest way to do this. If you need something more complicated
301
+(for instance, multiple public addresses for the API, or your own CA),
302
+then you will need to custom-generate the server certificate with the
303
+right names yourself.
304
+
305
+If you are unconcerned about any of this, you can add the
306
+--insecure-skip-tls-verify flag to bypass secure (TLS) verification,
307
+but this is risky and should not be necessary.
308
+** Connections could be intercepted and your credentials stolen. **
309
+`, serverHost)
310
+		case connRefusedRx.MatchString(errm):
311
+			errId, reason = "clientInvCertName", `
312
+This means that when we tried to connect to the OpenShift API
313
+server (master), we reached the host, but nothing accepted the port
314
+connection. This could mean that the OpenShift master is stopped, or
315
+that a firewall or security policy is blocking access at that port.
316
+
317
+You will not be able to connect or do anything at all with OpenShift
318
+until this server problem is resolved or you specify a corrected
319
+server address.`
320
+		case connTimeoutRx.MatchString(errm):
321
+			errId, reason = "clientConnTimeout", `
322
+This means that when we tried to connect to the OpenShift API server
323
+(master), we could not reach the host at all.
324
+* You may have specified the wrong host address.
325
+* This could mean the host is completely unavailable (down).
326
+* This could indicate a routing problem or a firewall that simply
327
+  drops requests rather than responding by reseting the connection.
328
+* It does not generally mean that DNS name resolution failed (which
329
+  would be a different error) though the problem could be that it
330
+  gave the wrong address.`
331
+		case strings.Contains(errm, malformedHTTPMsg):
332
+			errId, reason = "clientMalformedHTTP", `
333
+This means that when we tried to connect to the OpenShift API server
334
+(master) with a plain HTTP connection, the server did not speak
335
+HTTP back to us. The most common explanation is that a secure server
336
+is listening but you specified an http: connection instead of https:.
337
+There could also be another service listening at the intended port
338
+speaking some other protocol entirely.
339
+
340
+You will not be able to connect or do anything at all with OpenShift
341
+until this server problem is resolved or you specify a corrected
342
+server address.`
343
+		case strings.Contains(errm, malformedTLSMsg):
344
+			errId, reason = "clientMalformedTLS", `
345
+This means that when we tried to connect to the OpenShift API server
346
+(master) with a secure HTTPS connection, the server did not speak
347
+HTTPS back to us. The most common explanation is that the server
348
+listening at that port is not the secure server you expected - it
349
+may be a non-secure HTTP server or the wrong service may be
350
+listening there, or you may have specified an incorrect port.
351
+
352
+You will not be able to connect or do anything at all with OpenShift
353
+until this server problem is resolved or you specify a corrected
354
+server address.`
355
+		case strings.Contains(errm, unauthenticatedMsg):
356
+			errId, reason = "clientUnauthn", `
357
+This means that when we tried to make a request to the OpenShift API
358
+server, your kubeconfig did not present valid credentials to
359
+authenticate your client. Credentials generally consist of a client
360
+key/certificate or an access token. Your kubeconfig may not have
361
+presented any, or they may be invalid.`
362
+		case unauthorizedRx.MatchString(errm):
363
+			errId, reason = "clientUnauthz", `
364
+This means that when we tried to make a request to the OpenShift API
365
+server, the request required credentials that were not presented.
366
+This can happen when an authentication token expires. Try logging in
367
+with this user again.`
368
+		default:
369
+			errId, reason = "clientUnknownConnErr", `Diagnostics does not have an explanation for what this means. Please report this error so one can be added.`
370
+		}
371
+		errMsg := fmt.Sprintf("(%T) %[1]v", err)
372
+		msg["tmpl"] = msgText + errMsg + reason
373
+		msg["errMsg"] = errMsg
374
+		logger.Errorm(errId, msg)
375
+	}
376
+	return nil
377
+}
0 378
new file mode 100644
... ...
@@ -0,0 +1,80 @@
0
+package discovery
1
+
2
+import (
3
+	kclientcmdapi "github.com/GoogleCloudPlatform/kubernetes/pkg/client/clientcmd/api"
4
+	"github.com/openshift/origin/pkg/cmd/experimental/diagnostics/options"
5
+	mconfigapi "github.com/openshift/origin/pkg/cmd/server/api"
6
+	osclientcmd "github.com/openshift/origin/pkg/cmd/util/clientcmd"
7
+	"github.com/openshift/origin/pkg/diagnostics/log"
8
+	"github.com/openshift/origin/pkg/diagnostics/types"
9
+)
10
+
11
+// One env instance is created and filled in by discovery.
12
+// Then it should be considered immutable while diagnostics use it.
13
+type Environment struct {
14
+	// the options that were set by command invocation
15
+	Options *options.AllDiagnosticsOptions
16
+
17
+	// used to print discovery and diagnostic logs
18
+	Log *log.Logger
19
+
20
+	// do we have enough config to diagnose master,node,client?
21
+	WillCheck map[Target]bool
22
+
23
+	// general system info
24
+	HasBash      bool                         // for non-Linux clients, will not have bash...
25
+	HasSystemd   bool                         // not even all Linux has systemd
26
+	SystemdUnits map[string]types.SystemdUnit // list of relevant units present on system
27
+
28
+	// outcome from looking for executables
29
+	OscPath          string
30
+	OscVersion       types.Version
31
+	OpenshiftPath    string
32
+	OpenshiftVersion types.Version
33
+
34
+	// saved results from client discovery
35
+	ClientConfigPath    string                          // first client config file found, if any
36
+	ClientConfigRaw     *kclientcmdapi.Config           // available to analyze ^^
37
+	OsConfig            *kclientcmdapi.Config           // actual merged client configuration
38
+	FactoryForContext   map[string]*osclientcmd.Factory // one for each known context
39
+	AccessForContext    map[string]*ContextAccess       // one for each context that has access to anything
40
+	ClusterAdminFactory *osclientcmd.Factory            // factory we will use for cluster-admin access (could easily be nil)
41
+
42
+	// saved results from master discovery
43
+	MasterConfig *mconfigapi.MasterConfig // actual config determined from flags/file
44
+
45
+	// saved results from node discovery
46
+	NodeConfig *mconfigapi.NodeConfig // actual config determined from flags/file
47
+}
48
+
49
+type ContextAccess struct {
50
+	Projects     []string
51
+	ClusterAdmin bool // has access to see stuff only cluster-admin should
52
+}
53
+
54
+func NewEnvironment(opts *options.AllDiagnosticsOptions, logger *log.Logger) *Environment {
55
+	return &Environment{
56
+		Options:           opts,
57
+		Log:               logger,
58
+		SystemdUnits:      make(map[string]types.SystemdUnit),
59
+		WillCheck:         make(map[Target]bool),
60
+		FactoryForContext: make(map[string]*osclientcmd.Factory),
61
+		AccessForContext:  make(map[string]*ContextAccess),
62
+	}
63
+}
64
+
65
+// helpful translator
66
+func (env *Environment) DefaultFactory() *osclientcmd.Factory {
67
+	if env.FactoryForContext != nil && env.OsConfig != nil { // no need to panic if missing...
68
+		return env.FactoryForContext[env.OsConfig.CurrentContext]
69
+	}
70
+	return nil
71
+}
72
+
73
+type Target string
74
+
75
+const (
76
+	ClientTarget Target = "client"
77
+	MasterTarget Target = "master"
78
+	NodeTarget   Target = "node"
79
+)
0 80
new file mode 100644
... ...
@@ -0,0 +1,79 @@
0
+package discovery
1
+
2
+import (
3
+	mconfigapilatest "github.com/openshift/origin/pkg/cmd/server/api/latest"
4
+	"github.com/openshift/origin/pkg/cmd/server/start"
5
+)
6
+
7
+const StandardMasterConfPath string = "/etc/openshift/master/master-config.yaml"
8
+
9
+func (env *Environment) DiscoverMaster() {
10
+	// first, determine if we even have a master config
11
+	options := env.Options.MasterDiagOptions
12
+	if env.Options.MasterConfigPath != "" { // specified master conf, it has to load or we choke
13
+		options.MasterStartOptions.MasterArgs = start.NewDefaultMasterArgs() // and don't set any args
14
+		if env.tryMasterConfig(true) {
15
+			env.WillCheck[MasterTarget] = true
16
+		}
17
+	} else { // user did not indicate config file
18
+		env.Log.Debug("discMCnofile", "No top-level --master-config file specified")
19
+		if !options.MustCheck {
20
+			// general command, user couldn't indicate server flags;
21
+			// look for master config in standard location(s)
22
+			env.tryStandardMasterConfig() // or give up.
23
+		} else { // assume user provided flags like actual master.
24
+			env.tryMasterConfig(true)
25
+			env.WillCheck[MasterTarget] = true // regardless
26
+		}
27
+	}
28
+	if !env.WillCheck[MasterTarget] {
29
+		env.Log.Notice("discMCnone", "No master config found; master diagnostics will not be performed.")
30
+	}
31
+}
32
+
33
+func (env *Environment) tryMasterConfig(errOnFail bool) bool /* worked? */ {
34
+	options := env.Options.MasterDiagOptions.MasterStartOptions
35
+	logOnFail := env.Log.Debugf
36
+	if errOnFail {
37
+		logOnFail = env.Log.Errorf
38
+	}
39
+	if err := options.Complete(); err != nil {
40
+		logOnFail("discMCstart", "Could not read master config options: (%T) %[1]v", err)
41
+		return false
42
+	} else if err = options.Validate([]string{}); err != nil {
43
+		logOnFail("discMCstart", "Could not read master config options: (%T) %[1]v", err)
44
+		return false
45
+	}
46
+	var err error
47
+	if path := options.ConfigFile; path != "" {
48
+		env.Log.Debugf("discMCfile", "Looking for master config file at '%s'", path)
49
+		if env.MasterConfig, err = mconfigapilatest.ReadAndResolveMasterConfig(path); err != nil {
50
+			logOnFail("discMCfail", "Could not read master config file '%s':\n(%T) %[2]v", path, err)
51
+			return false
52
+		}
53
+		env.Log.Infof("discMCfound", "Found a master config file:\n%[1]s", path)
54
+		return true
55
+	} else {
56
+		if env.MasterConfig, err = options.MasterArgs.BuildSerializeableMasterConfig(); err != nil {
57
+			logOnFail("discMCopts", "Could not build a master config from flags:\n(%T) %[1]v", err)
58
+			return false
59
+		}
60
+		env.Log.Infof("discMCfound", "No master config file, using any flags for configuration.")
61
+	}
62
+	return false
63
+}
64
+
65
+func (env *Environment) tryStandardMasterConfig() bool /* worked? */ {
66
+	env.Log.Debug("discMCnoflags", "No master config flags specified, will try standard config location")
67
+	options := env.Options.MasterDiagOptions.MasterStartOptions
68
+	options.ConfigFile = StandardMasterConfPath
69
+	options.MasterArgs = start.NewDefaultMasterArgs()
70
+	if env.tryMasterConfig(false) {
71
+		env.Log.Debug("discMCdefault", "Using master config file at "+StandardMasterConfPath)
72
+		env.WillCheck[MasterTarget] = true
73
+		return true
74
+	} else { // otherwise, we just don't do master diagnostics
75
+		env.Log.Debugf("discMCnone", "Not using master config file at "+StandardMasterConfPath+" - will not do master diagnostics.")
76
+	}
77
+	return false
78
+}
0 79
new file mode 100644
... ...
@@ -0,0 +1,80 @@
0
+package discovery
1
+
2
+import (
3
+	mconfigapilatest "github.com/openshift/origin/pkg/cmd/server/api/latest"
4
+	"github.com/openshift/origin/pkg/cmd/server/start"
5
+)
6
+
7
+const StandardNodeConfPath string = "/etc/openshift/node/node-config.yaml"
8
+
9
+func (env *Environment) DiscoverNode() {
10
+	// first, determine if we even have a node config
11
+	options := env.Options.NodeDiagOptions
12
+	if env.Options.NodeConfigPath != "" { // specified node conf, it has to load or we choke
13
+		options.NodeStartOptions.NodeArgs = start.NewDefaultNodeArgs() // and don't set any args
14
+		if env.tryNodeConfig(true) {
15
+			env.WillCheck[NodeTarget] = true
16
+		}
17
+	} else { // user did not indicate config file
18
+		env.Log.Debug("discNCnofile", "No node config file specified")
19
+		if !options.MustCheck {
20
+			// general command, user couldn't indicate server flags;
21
+			// look for node config in standard location(s)
22
+			env.tryStandardNodeConfig() // or give up.
23
+		} else { // assume user provided flags like actual node.
24
+			env.tryNodeConfig(true)
25
+			env.WillCheck[NodeTarget] = true // regardless
26
+		}
27
+	}
28
+	if !env.WillCheck[NodeTarget] {
29
+		env.Log.Notice("discNCnone", "No node config found; node diagnostics will not be performed.")
30
+	}
31
+}
32
+
33
+func (env *Environment) tryNodeConfig(errOnFail bool) bool /* worked */ {
34
+	options := env.Options.NodeDiagOptions.NodeStartOptions
35
+	//pretty.Println("nodeconfig options are:", options)
36
+	logOnFail := env.Log.Debugf
37
+	if errOnFail {
38
+		logOnFail = env.Log.Errorf
39
+	}
40
+	if err := options.Complete(); err != nil {
41
+		logOnFail("discNCstart", "Could not read node config options: (%T) %[1]v", err)
42
+		return false
43
+	} else if err = options.Validate([]string{}); err != nil {
44
+		logOnFail("discNCstart", "Could not read node config options: (%T) %[1]v", err)
45
+		return false
46
+	}
47
+	var err error
48
+	if path := options.ConfigFile; path != "" {
49
+		env.Log.Debugf("discNCfile", "Looking for node config file at '%s'", path)
50
+		if env.NodeConfig, err = mconfigapilatest.ReadAndResolveNodeConfig(path); err != nil {
51
+			logOnFail("discNCfail", "Could not read node config file '%s':\n(%T) %[2]v", path, err)
52
+			return false
53
+		}
54
+		env.Log.Infof("discNCfound", "Found a node config file:\n%[1]s", path)
55
+		return true
56
+	} else {
57
+		if env.NodeConfig, err = options.NodeArgs.BuildSerializeableNodeConfig(); err != nil {
58
+			logOnFail("discNCopts", "Could not build a node config from flags:\n(%T) %[1]v", err)
59
+			return false
60
+		}
61
+		env.Log.Infof("discNCfound", "No node config file, using any flags for configuration.")
62
+	}
63
+	return false
64
+}
65
+
66
+func (env *Environment) tryStandardNodeConfig() bool /*worked*/ {
67
+	env.Log.Debug("discNCnoflags", "No node config flags specified, will try standard config location")
68
+	options := env.Options.NodeDiagOptions.NodeStartOptions
69
+	options.ConfigFile = StandardNodeConfPath
70
+	options.NodeArgs = start.NewDefaultNodeArgs()
71
+	if env.tryNodeConfig(false) {
72
+		env.Log.Debug("discNCdefault", "Using node config file at "+StandardNodeConfPath)
73
+		env.WillCheck[NodeTarget] = true
74
+		return true
75
+	} else { // otherwise, we just don't do node diagnostics
76
+		env.Log.Debugf("discNCnone", "Not using node config file at "+StandardNodeConfPath+" - will not do node diagnostics.")
77
+	}
78
+	return false
79
+}
0 80
new file mode 100644
... ...
@@ -0,0 +1,19 @@
0
+package discovery
1
+
2
+import (
3
+	"os/exec"
4
+	"runtime"
5
+)
6
+
7
+// ----------------------------------------------------------
8
+// Determine what we need to about the OS
9
+func (env *Environment) DiscoverOperatingSystem() {
10
+	if runtime.GOOS == "linux" {
11
+		if _, err := exec.LookPath("systemctl"); err == nil {
12
+			env.HasSystemd = true
13
+		}
14
+		if _, err := exec.LookPath("/bin/bash"); err == nil {
15
+			env.HasBash = true
16
+		}
17
+	}
18
+}
0 19
new file mode 100644
... ...
@@ -0,0 +1,58 @@
0
+package discovery
1
+
2
+import (
3
+	"fmt"
4
+	"github.com/openshift/origin/pkg/diagnostics/log"
5
+	"github.com/openshift/origin/pkg/diagnostics/types"
6
+	"os/exec"
7
+	"strings"
8
+)
9
+
10
+// ----------------------------------------------------------
11
+// Determine what systemd units are relevant, if any
12
+// Run after determining whether systemd and openshift are present.
13
+func (env *Environment) DiscoverSystemd() {
14
+	env.Log.Notice("discBegin", "Beginning systemd discovery")
15
+	for _, name := range []string{"openshift", "openshift-master", "openshift-node", "openshift-sdn-master", "openshift-sdn-node", "docker", "openvswitch", "iptables", "etcd", "kubernetes"} {
16
+		if env.SystemdUnits[name] = discoverSystemdUnit(name, env.Log); env.SystemdUnits[name].Exists {
17
+			env.Log.Debugm("discUnit", log.Msg{"tmpl": "Saw systemd unit {{.unit}}", "unit": name})
18
+		}
19
+	}
20
+	env.Log.Debugf("discUnits", "%v", env.SystemdUnits)
21
+}
22
+
23
+func discoverSystemdUnit(name string, logger *log.Logger) types.SystemdUnit {
24
+	unit := types.SystemdUnit{Name: name, Exists: false}
25
+	if output, err := exec.Command("systemctl", "show", name).Output(); err != nil {
26
+		logger.Errorm("discCtlErr", log.Msg{"tmpl": "Error running `systemctl show {{.unit}}`: {{.error}}\nCannot analyze systemd units.", "unit": name, "error": err.Error()})
27
+	} else {
28
+		attr := make(map[string]string)
29
+		for _, line := range strings.Split(string(output), "\n") {
30
+			elements := strings.SplitN(line, "=", 2) // Looking for "Foo=Bar" settings
31
+			if len(elements) == 2 {                  // found that, record it...
32
+				attr[elements[0]] = elements[1]
33
+			}
34
+		}
35
+		if val := attr["LoadState"]; val != "loaded" {
36
+			logger.Debugm("discUnitENoExist", log.Msg{"tmpl": "systemd unit '{{.unit}}' does not exist. LoadState is '{{.state}}'", "unit": name, "state": val})
37
+			return unit // doesn't exist - leave everything blank
38
+		} else {
39
+			unit.Exists = true
40
+		}
41
+		if val := attr["UnitFileState"]; val == "enabled" {
42
+			logger.Debugm("discUnitEnabled", log.Msg{"tmpl": "systemd unit '{{.unit}}' is enabled - it will start automatically at boot.", "unit": name})
43
+			unit.Enabled = true
44
+		} else {
45
+			logger.Debugm("discUnitNoEnable", log.Msg{"tmpl": "systemd unit '{{.unit}}' is not enabled - it does not start automatically at boot. UnitFileState is '{{.state}}'", "unit": name, "state": val})
46
+		}
47
+		if val := attr["ActiveState"]; val == "active" {
48
+			logger.Debugm("discUnitActive", log.Msg{"tmpl": "systemd unit '{{.unit}}' is currently running", "unit": name})
49
+			unit.Active = true
50
+		} else {
51
+			logger.Debugm("discUnitNoActive", log.Msg{"unit": name, "state": val, "exit": unit.ExitStatus,
52
+				"tmpl": "systemd unit '{{.unit}}' is not currently running. ActiveState is '{{.state}}'; exit code was {{.exit}}."})
53
+		}
54
+		fmt.Sscanf(attr["StatusErrno"], "%d", &unit.ExitStatus) // ignore errors...
55
+	}
56
+	return unit
57
+}
0 58
new file mode 100644
... ...
@@ -0,0 +1,33 @@
0
+package log
1
+
2
+import (
3
+	"encoding/json"
4
+	"fmt"
5
+	"io"
6
+)
7
+
8
+type jsonLogger struct {
9
+	out         io.Writer
10
+	logStarted  bool
11
+	logFinished bool
12
+}
13
+
14
+func (j *jsonLogger) Write(l Level, msg Msg) {
15
+	if j.logStarted {
16
+		fmt.Fprintln(j.out, ",")
17
+	} else {
18
+		fmt.Fprintln(j.out, "[")
19
+	}
20
+	j.logStarted = true
21
+	msg["level"] = l.Name
22
+	b, _ := json.MarshalIndent(msg, "  ", "  ")
23
+	fmt.Print("  " + string(b))
24
+}
25
+func (j *jsonLogger) Finish() {
26
+	if j.logStarted {
27
+		fmt.Fprintln(j.out, "\n]")
28
+	} else if !j.logFinished {
29
+		fmt.Fprintln(j.out, "[]")
30
+	}
31
+	j.logFinished = true
32
+}
0 33
new file mode 100644
... ...
@@ -0,0 +1,205 @@
0
+package log
1
+
2
+import (
3
+	"bytes"
4
+	"errors"
5
+	"fmt"
6
+	ct "github.com/daviddengcn/go-colortext"
7
+	"io"
8
+	"strings"
9
+	"text/template"
10
+)
11
+
12
+type Level struct {
13
+	Level  int
14
+	Name   string
15
+	Prefix string
16
+	Color  ct.Color
17
+	Bright bool
18
+}
19
+
20
+type Logger struct {
21
+	logger       loggerType
22
+	level        Level
23
+	warningsSeen int
24
+	errorsSeen   int
25
+}
26
+
27
+// Internal type to deal with different log formats
28
+type loggerType interface {
29
+	Write(Level, Msg)
30
+	Finish()
31
+}
32
+
33
+func NewLogger(setLevel int, setFormat string, out io.Writer) (*Logger, error) {
34
+
35
+	var logger loggerType
36
+	switch setFormat {
37
+	case "json":
38
+		logger = &jsonLogger{out: out}
39
+	case "yaml":
40
+		logger = &yamlLogger{out: out}
41
+	case "text":
42
+		logger = newTextLogger(out)
43
+	default:
44
+		return nil, errors.New("Output format must be one of: text, json, yaml")
45
+	}
46
+
47
+	var err error = nil
48
+	level := DebugLevel
49
+	switch setLevel {
50
+	case 0:
51
+		level = ErrorLevel
52
+	case 1:
53
+		level = WarnLevel
54
+	case 2:
55
+		level = NoticeLevel
56
+	case 3:
57
+		level = InfoLevel
58
+	case 4:
59
+		// Debug, also default for invalid numbers below
60
+	default:
61
+		err = errors.New("Invalid diagnostic level; must be 0-4")
62
+	}
63
+	return &Logger{
64
+		logger: logger,
65
+		level:  level,
66
+	}, err
67
+}
68
+
69
+// a map message type to throw type safety and method signatures out the window:
70
+type Msg map[string]interface{}
71
+
72
+/* a Msg can be expected to have the following entries:
73
+ * "id": an identifier unique to the message being logged, intended for json/yaml output
74
+ *       so that automation can recognize specific messages without trying to parse them.
75
+ * "text": human-readable message text
76
+ * "tmpl": a template string as understood by text/template that can use any of the other
77
+ *         entries in this Msg as inputs. This is removed, evaluated, and the result is
78
+ *         placed in "text". If there is an error during evaluation, the error is placed
79
+ *         in "templateErr", the original id of the message is stored in "templateId",
80
+ *         and the Msg id is changed to "tmplErr". Of course, this should never happen
81
+ *         if there are no mistakes in the calling code.
82
+ */
83
+
84
+var (
85
+	ErrorLevel  = Level{0, "error", "ERROR: ", ct.Red, true}   // Something is definitely wrong
86
+	WarnLevel   = Level{1, "warn", "WARN:  ", ct.Yellow, true} // Likely to be an issue but maybe not
87
+	NoticeLevel = Level{2, "note", "[Note] ", ct.White, false} // Introductory / summary
88
+	InfoLevel   = Level{3, "info", "Info:  ", ct.None, false}  // Just informational
89
+	DebugLevel  = Level{4, "debug", "debug: ", ct.None, false} // Extra verbose
90
+)
91
+
92
+// Provide a summary at the end
93
+func (l *Logger) Summary() {
94
+	l.Notice("summary", "\nSummary of diagnostics execution:\n")
95
+	if l.warningsSeen > 0 {
96
+		l.Noticem("sumWarn", Msg{"tmpl": "Warnings seen: {{.num}}", "num": l.warningsSeen})
97
+	}
98
+	if l.errorsSeen > 0 {
99
+		l.Noticem("sumErr", Msg{"tmpl": "Errors seen: {{.num}}", "num": l.errorsSeen})
100
+	}
101
+	if l.warningsSeen == 0 && l.errorsSeen == 0 {
102
+		l.Notice("sumNone", "Completed with no errors or warnings seen.")
103
+	}
104
+}
105
+
106
+func (l *Logger) Log(level Level, id string, msg Msg) {
107
+	if level.Level > l.level.Level {
108
+		return
109
+	}
110
+	msg["id"] = id // TODO: use to retrieve template from elsewhere
111
+	// if given a template, convert it to text
112
+	if tmpl, exists := msg["tmpl"]; exists {
113
+		var buff bytes.Buffer
114
+		if tmplString, assertion := tmpl.(string); !assertion {
115
+			msg["templateErr"] = fmt.Sprintf("Invalid template type: %T", tmpl)
116
+			msg["templateId"] = id
117
+			msg["id"] = "tmplErr"
118
+		} else {
119
+			parsedTmpl, err := template.New(id).Parse(tmplString)
120
+			if err != nil {
121
+				msg["templateErr"] = err.Error()
122
+				msg["templateId"] = id
123
+				msg["id"] = "tmplErr"
124
+			} else if err = parsedTmpl.Execute(&buff, msg); err != nil {
125
+				msg["templateErr"] = err.Error()
126
+				msg["templateId"] = id
127
+				msg["id"] = "tmplErr"
128
+			} else {
129
+				msg["text"] = buff.String()
130
+				delete(msg, "tmpl")
131
+			}
132
+		}
133
+	}
134
+	if level.Level == ErrorLevel.Level {
135
+		l.errorsSeen += 1
136
+	} else if level.Level == WarnLevel.Level {
137
+		l.warningsSeen += 1
138
+	}
139
+	l.logger.Write(level, msg)
140
+}
141
+
142
+// Convenience functions
143
+func (l *Logger) Error(id string, text string) {
144
+	l.Log(ErrorLevel, id, Msg{"text": text})
145
+}
146
+func (l *Logger) Errorf(id string, msg string, a ...interface{}) {
147
+	l.Error(id, fmt.Sprintf(msg, a...))
148
+}
149
+func (l *Logger) Errorm(id string, msg Msg) {
150
+	l.Log(ErrorLevel, id, msg)
151
+}
152
+func (l *Logger) Warn(id string, text string) {
153
+	l.Log(WarnLevel, id, Msg{"text": text})
154
+}
155
+func (l *Logger) Warnf(id string, msg string, a ...interface{}) {
156
+	l.Warn(id, fmt.Sprintf(msg, a...))
157
+}
158
+func (l *Logger) Warnm(id string, msg Msg) {
159
+	l.Log(WarnLevel, id, msg)
160
+}
161
+func (l *Logger) Info(id string, text string) {
162
+	l.Log(InfoLevel, id, Msg{"text": text})
163
+}
164
+func (l *Logger) Infof(id string, msg string, a ...interface{}) {
165
+	l.Info(id, fmt.Sprintf(msg, a...))
166
+}
167
+func (l *Logger) Infom(id string, msg Msg) {
168
+	l.Log(InfoLevel, id, msg)
169
+}
170
+func (l *Logger) Notice(id string, text string) {
171
+	l.Log(NoticeLevel, id, Msg{"text": text})
172
+}
173
+func (l *Logger) Noticef(id string, msg string, a ...interface{}) {
174
+	l.Notice(id, fmt.Sprintf(msg, a...))
175
+}
176
+func (l *Logger) Noticem(id string, msg Msg) {
177
+	l.Log(NoticeLevel, id, msg)
178
+}
179
+func (l *Logger) Debug(id string, text string) {
180
+	l.Log(DebugLevel, id, Msg{"text": text})
181
+}
182
+func (l *Logger) Debugf(id string, msg string, a ...interface{}) {
183
+	l.Debug(id, fmt.Sprintf(msg, a...))
184
+}
185
+func (l *Logger) Debugm(id string, msg Msg) {
186
+	l.Log(DebugLevel, id, msg)
187
+}
188
+
189
+func (l *Logger) Finish() {
190
+	l.logger.Finish()
191
+}
192
+
193
+func (l *Logger) ErrorsSeen() bool {
194
+	return l.errorsSeen > 0
195
+}
196
+
197
+// turn excess lines into [...]
198
+func LimitLines(msg string, n int) string {
199
+	lines := strings.SplitN(msg, "\n", n+1)
200
+	if len(lines) == n+1 {
201
+		lines[n] = "[...]"
202
+	}
203
+	return strings.Join(lines, "\n")
204
+}
0 205
new file mode 100644
... ...
@@ -0,0 +1,53 @@
0
+package log
1
+
2
+import (
3
+	"fmt"
4
+	ct "github.com/daviddengcn/go-colortext"
5
+	"github.com/docker/docker/pkg/term"
6
+	"io"
7
+	"os"
8
+	"strings"
9
+)
10
+
11
+type textLogger struct {
12
+	out         io.Writer
13
+	ttyOutput   bool // usually want color; but do not output colors to non-tty
14
+	lastNewline bool // keep track of newline separation
15
+}
16
+
17
+func newTextLogger(out io.Writer) *textLogger {
18
+	logger := &textLogger{out: out, lastNewline: true}
19
+
20
+	if IsTerminal(out) {
21
+		// only want color sequences to humans, not redirected output (logs, "less", etc.)
22
+		logger.ttyOutput = true
23
+	}
24
+	return logger
25
+}
26
+
27
+// cribbed a la "github.com/openshift/origin/pkg/cmd/util"
28
+func IsTerminal(w io.Writer) bool {
29
+	file, ok := w.(*os.File)
30
+	return ok && term.IsTerminal(file.Fd())
31
+}
32
+
33
+func (t *textLogger) Write(l Level, msg Msg) {
34
+	if t.ttyOutput {
35
+		ct.ChangeColor(l.Color, l.Bright, ct.None, false)
36
+	}
37
+	text := strings.TrimSpace(fmt.Sprintf("%v", msg["text"]))
38
+	if strings.Contains(text, "\n") { // separate multiline comments with newlines
39
+		if !t.lastNewline {
40
+			fmt.Fprintln(t.out) // separate from previous one-line log msg
41
+		}
42
+		text = text + "\n"
43
+		t.lastNewline = true
44
+	} else {
45
+		t.lastNewline = false
46
+	}
47
+	fmt.Fprintln(t.out, l.Prefix+strings.Replace(text, "\n", "\n       ", -1))
48
+	if t.ttyOutput {
49
+		ct.ResetColor()
50
+	}
51
+}
52
+func (t *textLogger) Finish() {}
0 53
new file mode 100644
... ...
@@ -0,0 +1,19 @@
0
+package log
1
+
2
+import (
3
+	"fmt"
4
+	"gopkg.in/yaml.v2"
5
+	"io"
6
+)
7
+
8
+type yamlLogger struct {
9
+	out        io.Writer
10
+	logStarted bool
11
+}
12
+
13
+func (y *yamlLogger) Write(l Level, msg Msg) {
14
+	msg["level"] = l.Name
15
+	b, _ := yaml.Marshal(&msg)
16
+	fmt.Fprintln(y.out, "---\n"+string(b))
17
+}
18
+func (y *yamlLogger) Finish() {}
0 19
new file mode 100644
... ...
@@ -0,0 +1,129 @@
0
+package run
1
+
2
+import (
3
+	"github.com/openshift/origin/pkg/cmd/experimental/diagnostics/options"
4
+	"github.com/openshift/origin/pkg/cmd/server/start"
5
+	"github.com/openshift/origin/pkg/diagnostics/client"
6
+	"github.com/openshift/origin/pkg/diagnostics/discovery"
7
+	"github.com/openshift/origin/pkg/diagnostics/log"
8
+	"github.com/openshift/origin/pkg/diagnostics/systemd"
9
+	"github.com/openshift/origin/pkg/diagnostics/types/diagnostic"
10
+	"os"
11
+	"strings"
12
+)
13
+
14
+func Diagnose(opts *options.AllDiagnosticsOptions) {
15
+	// start output to a log
16
+	dopts := opts.DiagOptions
17
+	logger, _ := log.NewLogger(dopts.DiagLevel, dopts.DiagFormat, dopts.Output.Get())
18
+	// start discovery
19
+	if env := RunDiscovery(opts, logger); env != nil { // discovery result can veto continuing
20
+		allDiags := make(map[string]map[string]diagnostic.Diagnostic)
21
+		// now we will figure out what diagnostics to run based on discovery.
22
+		for area := range env.WillCheck {
23
+			switch area {
24
+			case discovery.ClientTarget:
25
+				allDiags["client"] = client.Diagnostics
26
+			case discovery.MasterTarget, discovery.NodeTarget:
27
+				allDiags["systemd"] = systemd.Diagnostics
28
+			}
29
+		}
30
+		if list := opts.DiagOptions.Diagnostics; len(*list) > 0 {
31
+			// just run a specific (set of) diagnostic(s)
32
+			for _, arg := range *list {
33
+				parts := strings.SplitN(arg, ".", 2)
34
+				if len(parts) < 2 {
35
+					env.Log.Noticef("noDiag", `There is no such diagnostic "%s"`, arg)
36
+					continue
37
+				}
38
+				area, name := parts[0], parts[1]
39
+				if diagnostics, exists := allDiags[area]; !exists {
40
+					env.Log.Noticef("noDiag", `There is no such diagnostic "%s"`, arg)
41
+				} else if diag, exists := diagnostics[name]; !exists {
42
+					env.Log.Noticef("noDiag", `There is no such diagnostic "%s"`, arg)
43
+				} else {
44
+					RunDiagnostic(area, name, diag, env)
45
+				}
46
+			}
47
+		} else {
48
+			// TODO: run all of these in parallel but ensure sane output
49
+			for area, diagnostics := range allDiags {
50
+				for name, diag := range diagnostics {
51
+					RunDiagnostic(area, name, diag, env)
52
+				}
53
+			}
54
+		}
55
+	}
56
+	logger.Summary()
57
+	logger.Finish()
58
+	if logger.ErrorsSeen() {
59
+		os.Exit(255)
60
+	}
61
+}
62
+
63
+// ----------------------------------------------------------
64
+// Examine system and return findings in an Environment
65
+func RunDiscovery(adOpts *options.AllDiagnosticsOptions, logger *log.Logger) *discovery.Environment {
66
+	logger.Notice("discBegin", "Beginning discovery of environment")
67
+	env := discovery.NewEnvironment(adOpts, logger)
68
+	env.DiscoverOperatingSystem()
69
+	if adOpts.MasterDiagOptions != nil || adOpts.NodeDiagOptions != nil {
70
+		env.DiscoverSystemd()
71
+	}
72
+	if mdOpts := adOpts.MasterDiagOptions; mdOpts != nil {
73
+		if mdOpts.MasterStartOptions == nil {
74
+			mdOpts.MasterStartOptions = &start.MasterOptions{ConfigFile: adOpts.MasterConfigPath}
75
+			// leaving MasterArgs nil signals it has to be a master config file or nothing.
76
+		} else if adOpts.MasterConfigPath != "" {
77
+			mdOpts.MasterStartOptions.ConfigFile = adOpts.MasterConfigPath
78
+		}
79
+		env.DiscoverMaster()
80
+	}
81
+	if ndOpts := adOpts.NodeDiagOptions; ndOpts != nil {
82
+		if ndOpts.NodeStartOptions == nil {
83
+			ndOpts.NodeStartOptions = &start.NodeOptions{ConfigFile: adOpts.NodeConfigPath}
84
+			// no NodeArgs signals it has to be a node config file or nothing.
85
+		} else if adOpts.NodeConfigPath != "" {
86
+			ndOpts.NodeStartOptions.ConfigFile = adOpts.NodeConfigPath
87
+		}
88
+		env.DiscoverNode()
89
+	}
90
+	if cdOpts := adOpts.ClientDiagOptions; cdOpts != nil {
91
+		env.DiscoverClient()
92
+		env.ReadClientConfigFiles() // so user knows where config is coming from (or not)
93
+		env.ConfigClient()
94
+	}
95
+	checkAny := false
96
+	for _, check := range env.WillCheck {
97
+		checkAny = checkAny || check
98
+	}
99
+	if !checkAny {
100
+		logger.Error("discNoChecks", "Cannot find any OpenShift configuration. Please specify which component or configuration you wish to troubleshoot.")
101
+		return nil
102
+	}
103
+	return env
104
+}
105
+
106
+func RunDiagnostic(area string, name string, diag diagnostic.Diagnostic, env *discovery.Environment) {
107
+	defer func() {
108
+		// recover from diagnostics that panic so others can still run
109
+		if r := recover(); r != nil {
110
+			env.Log.Errorf("diagPanic", "Diagnostic '%s' crashed; this is usually a bug in either diagnostics or OpenShift. Stack trace:\n%+v", name, r)
111
+		}
112
+	}()
113
+	if diag.Condition != nil {
114
+		if skip, reason := diag.Condition(env); skip {
115
+			if reason == "" {
116
+				env.Log.Noticem("diagSkip", log.Msg{"area": area, "name": name, "diag": diag.Description,
117
+					"tmpl": "Skipping diagnostic: {{.area}}.{{.name}}\nDescription: {{.diag}}"})
118
+			} else {
119
+				env.Log.Noticem("diagSkip", log.Msg{"area": area, "name": name, "diag": diag.Description, "reason": reason,
120
+					"tmpl": "Skipping diagnostic: {{.area}}.{{.name}}\nDescription: {{.diag}}\nBecause: {{.reason}}"})
121
+			}
122
+			return
123
+		}
124
+	}
125
+	env.Log.Noticem("diagRun", log.Msg{"area": area, "name": name, "diag": diag.Description,
126
+		"tmpl": "Running diagnostic: {{.area}}.{{.name}}\nDescription: {{.diag}}"})
127
+	diag.Run(env)
128
+}
0 129
new file mode 100644
... ...
@@ -0,0 +1,514 @@
0
+package systemd
1
+
2
+import (
3
+	"bufio"
4
+	"encoding/json"
5
+	"fmt"
6
+	"github.com/openshift/origin/pkg/diagnostics/discovery"
7
+	"github.com/openshift/origin/pkg/diagnostics/log"
8
+	"github.com/openshift/origin/pkg/diagnostics/types"
9
+	"github.com/openshift/origin/pkg/diagnostics/types/diagnostic"
10
+	"io"
11
+	"os/exec"
12
+	"regexp"
13
+)
14
+
15
+type logEntry struct {
16
+	Message string // I feel certain we will want more fields at some point
17
+}
18
+
19
+type logMatcher struct { // regex for scanning log messages and interpreting them when found
20
+	Regexp         *regexp.Regexp
21
+	Level          log.Level
22
+	Id             string
23
+	Interpretation string // log with above level+id if it's simple
24
+	KeepAfterMatch bool   // usually note only first matched entry, ignore rest
25
+	Interpret      func(  // run this for custom logic on match
26
+		env *discovery.Environment,
27
+		entry *logEntry,
28
+		matches []string,
29
+	) bool // KeepAfterMatch?
30
+}
31
+
32
+type unitSpec struct {
33
+	Name        string
34
+	StartMatch  *regexp.Regexp // regex to look for in log messages indicating startup
35
+	LogMatchers []logMatcher   // suspect log patterns to check for - checked in order
36
+}
37
+
38
+//
39
+// -------- Things that feed into the diagnostics definitions -----------
40
+// Search for Diagnostics for the actual diagnostics.
41
+
42
+// Reusable log matchers:
43
+var badImageTemplate = logMatcher{
44
+	Regexp: regexp.MustCompile(`Unable to find an image for .* due to an error processing the format: %!v\\(MISSING\\)`),
45
+	Level:  log.InfoLevel,
46
+	Interpretation: `
47
+This error indicates openshift was given the flag --images including an invalid format variable.
48
+Valid formats can include (literally) ${component} and ${version}.
49
+This could be a typo or you might be intending to hardcode something,
50
+such as a version which should be specified as e.g. v3.0, not ${v3.0}.
51
+Note that the --images flag may be supplied via the OpenShift master,
52
+node, or "openshift ex registry/router" invocations and should usually
53
+be the same for each.`,
54
+}
55
+
56
+// captures for logMatcher Interpret functions to store state between matches
57
+var tlsClientErrorSeen map[string]bool
58
+
59
+// Specify what units we can check and what to look for and say about it
60
+var unitLogSpecs = []*unitSpec{
61
+	{
62
+		Name:       "openshift-master",
63
+		StartMatch: regexp.MustCompile("Starting an OpenShift master"),
64
+		LogMatchers: []logMatcher{
65
+			badImageTemplate,
66
+			{
67
+				Regexp:         regexp.MustCompile("Unable to decode an event from the watch stream: local error: unexpected message"),
68
+				Level:          log.InfoLevel,
69
+				Id:             "sdLogOMIgnore",
70
+				Interpretation: "You can safely ignore this message.",
71
+			},
72
+			{
73
+				Regexp: regexp.MustCompile("HTTP probe error: Get .*/healthz: dial tcp .*:10250: connection refused"),
74
+				Level:  log.InfoLevel,
75
+				Id:     "sdLogOMhzRef",
76
+				Interpretation: `
77
+The OpenShift master does a health check on nodes that are defined in
78
+its records, and this is the result when the node is not available yet.
79
+Since the master records are typically created before the node is
80
+available, this is not usually a problem, unless it continues in the
81
+logs after the node is actually available.`,
82
+			},
83
+			{
84
+				// TODO: don't rely on ipv4 format, should be ipv6 "soon"
85
+				Regexp: regexp.MustCompile("http: TLS handshake error from ([\\d.]+):\\d+: remote error: bad certificate"),
86
+				Level:  log.WarnLevel,
87
+				Interpret: func(env *discovery.Environment, entry *logEntry, matches []string) bool {
88
+					client := matches[1]
89
+					prelude := fmt.Sprintf("Found 'openshift-master' journald log message:\n  %s\n", entry.Message)
90
+					if tlsClientErrorSeen == nil { // first time this message was seen
91
+						tlsClientErrorSeen = map[string]bool{client: true}
92
+						// TODO: too generic, adjust message depending on subnet of the "from" address
93
+						env.Log.Warnm("sdLogOMreBadCert", log.Msg{"client": client, "text": prelude + `
94
+This error indicates that a client attempted to connect to the master
95
+HTTPS API server but broke off the connection because the master's
96
+certificate is not validated by a cerificate authority (CA) acceptable
97
+to the client. There are a number of ways this can occur, some more
98
+problematic than others.
99
+
100
+At this time, the OpenShift master certificate is signed by a private CA
101
+(created the first time the master runs) and clients should have a copy of
102
+that CA certificate in order to validate connections to the master. Most
103
+likely, either:
104
+1. the master has generated a new CA (after the administrator deleted
105
+   the old one) and the client has a copy of the old CA cert, or
106
+2. the client hasn't been configured with a private CA at all (or the
107
+   wrong one), or
108
+3. the client is attempting to reach the master at a URL that isn't
109
+   covered by the master's server certificate, e.g. a public-facing
110
+   name or IP that isn't known to the master automatically; this may
111
+   need to be specified with the --public-master flag on the master
112
+   in order to generate a new server certificate including it.
113
+
114
+Clients of the master may include users, nodes, and infrastructure
115
+components running as containers. Check the "from" IP address in the
116
+log message:
117
+* If it is from a SDN IP, it is likely from an infrastructure
118
+  component. Check pod logs and recreate it with the correct CA cert.
119
+  Routers and registries won't work properly with the wrong CA.
120
+* If it is from a node IP, the client is likely a node. Check the
121
+  openshift-node and openshift-sdn-node logs and reconfigure with the
122
+  correct CA cert. Nodes will be unable to create pods until this is
123
+  corrected.
124
+* If it is from an external IP, it is likely from a user (CLI, browser,
125
+  etc.). osc and openshift clients should be configured with the correct
126
+  CA cert; browsers can also add CA certs but it is usually easier
127
+  to just have them accept the server certificate on the first visit
128
+  (so this message may simply indicate that the master generated a new
129
+  server certificate, e.g. to add a different --public-master, and a
130
+  browser hasn't accepted it yet and is still attempting API calls;
131
+  try logging out of the console and back in again).`})
132
+					} else if !tlsClientErrorSeen[client] {
133
+						tlsClientErrorSeen[client] = true
134
+						env.Log.Warnm("sdLogOMreBadCert", log.Msg{"client": client, "text": prelude +
135
+							`This message was diagnosed above, but for a different client address.`})
136
+					} // else, it's a repeat, don't mention it
137
+					return true // show once for every client failing to connect, not just the first
138
+				},
139
+			},
140
+			{
141
+				// user &{system:anonymous  [system:unauthenticated]} -> /api/v1beta1/services?namespace="
142
+				Regexp: regexp.MustCompile("system:anonymous\\W*system:unauthenticated\\W*/api/v1beta1/services\\?namespace="),
143
+				Level:  log.WarnLevel,
144
+				Id:     "sdLogOMunauthNode",
145
+				Interpretation: `
146
+This indicates the OpenShift API server (master) received an unscoped
147
+request to get Services. Requests like this probably come from an
148
+OpenShift node trying to discover where it should proxy services.
149
+
150
+However, the request was unauthenticated, so it was denied. The node
151
+either did not offer a client certificate for credential, or offered an
152
+invalid one (not signed by the certificate authority the master uses).
153
+The node will not be able to function without this access.
154
+
155
+Unfortunately, this message does not tell us *which* node is the
156
+problem. But running diagnostics on your node hosts should find a log
157
+message for any node with this problem.
158
+`,
159
+			},
160
+		},
161
+	},
162
+	{
163
+		Name:        "openshift-sdn-master",
164
+		StartMatch:  regexp.MustCompile("Starting OpenShift SDN Master"),
165
+		LogMatchers: []logMatcher{},
166
+	},
167
+	{
168
+		Name:       "openshift-node",
169
+		StartMatch: regexp.MustCompile("Starting an OpenShift node"),
170
+		LogMatchers: []logMatcher{
171
+			badImageTemplate,
172
+			{
173
+				Regexp: regexp.MustCompile(`error updating node status, will retry:.*system:(\S+) cannot get on minions with name "(\S+)" in default|Failed to list .*Forbidden: "\S+" system:node-\S+ cannot list on (pods|services) in`),
174
+				Level:  log.ErrorLevel,
175
+				Id:     "sdLogONnodePerm",
176
+				Interpretation: `
177
+openshift-node lacks the permission to update the node's status or request
178
+its responsibilities from the OpenShift master API. This host will not
179
+function as a node until this is resolved. Pods scheduled for this node
180
+will remain in pending or unknown state forever.
181
+
182
+This probably indicates a problem with policy as node credentials in beta3
183
+allow access to anything (later, they will be constrained only to pods
184
+that belong to them). This message indicates that the node credentials
185
+are authenticated, but not authorized for the necessary access.
186
+
187
+One way to encounter this is to start the master with data from an older
188
+installation (e.g. beta2) in etcd. The default startup will not update
189
+existing policy to allow node access as they would have if starting with
190
+an empty etcd. In this case, the following command (as admin):
191
+
192
+    osc get rolebindings -n master
193
+
194
+... should show group system:nodes has the master/system:component role.
195
+If that is missing, you may wish to rewrite the bootstrap policy with:
196
+
197
+    POLICY=/var/lib/openshift/openshift.local.policy/policy.json
198
+    CONF=/etc/openshift/master.yaml
199
+    openshift admin overwrite-policy --filename=$POLICY --master-config=$CONF
200
+
201
+If that is not the problem, then it may be that access controls on nodes
202
+have been put in place and are blocking this request; check the error
203
+message to see whether the node is attempting to use the wrong node name.
204
+`,
205
+			},
206
+			{
207
+				Regexp: regexp.MustCompile("Unable to load services: Get (http\\S+/api/v1beta1/services\\?namespace=): (.+)"), // e.g. x509: certificate signed by unknown authority
208
+				Level:  log.ErrorLevel,
209
+				Id:     "sdLogONconnMaster",
210
+				Interpretation: `
211
+openshift-node could not connect to the OpenShift master API in order
212
+to determine its responsibilities. This host will not function as a node
213
+until this is resolved. Pods scheduled for this node will remain in
214
+pending or unknown state forever.`,
215
+			},
216
+			{
217
+				Regexp: regexp.MustCompile(`Unable to load services: request.*403 Forbidden: Forbidden: "/api/v1beta1/services\?namespace=" denied by default`),
218
+				Level:  log.ErrorLevel,
219
+				Id:     "sdLogONMasterForbids",
220
+				Interpretation: `
221
+openshift-node could not connect to the OpenShift master API to determine
222
+its responsibilities because it lacks the proper credentials. Nodes
223
+should specify a client certificate in order to identify themselves to
224
+the master. This message typically means that either no client key/cert
225
+was supplied, or it is not validated by the certificate authority (CA)
226
+the master uses. You should supply a correct client key and certificate
227
+to the .kubeconfig specified in /etc/sysconfig/openshift-node
228
+
229
+This host will not function as a node until this is resolved. Pods
230
+scheduled for this node will remain in pending or unknown state forever.`,
231
+			},
232
+		},
233
+	},
234
+	{
235
+		Name:       "openshift-sdn-node",
236
+		StartMatch: regexp.MustCompile("Starting OpenShift SDN node"),
237
+		LogMatchers: []logMatcher{
238
+			{
239
+				Regexp: regexp.MustCompile("Could not find an allocated subnet for this minion.*Waiting.."),
240
+				Level:  log.WarnLevel,
241
+				Id:     "sdLogOSNnoSubnet",
242
+				Interpretation: `
243
+This warning occurs when openshift-sdn-node is trying to request the
244
+SDN subnet it should be configured with according to openshift-sdn-master,
245
+but either can't connect to it ("All the given peers are not reachable")
246
+or has not yet been assigned a subnet ("Key not found").
247
+
248
+This can just be a matter of waiting for the master to become fully
249
+available and define a record for the node (aka "minion") to use,
250
+and openshift-sdn-node will wait until that occurs, so the presence
251
+of this message in the node log isn't necessarily a problem as
252
+long as the SDN is actually working, but this message may help indicate
253
+the problem if it is not working.
254
+
255
+If the master is available and this node's record is defined and this
256
+message persists, then it may be a sign of a different misconfiguration.
257
+Unfortunately the message is not specific about why the connection failed.
258
+Check MASTER_URL in /etc/sysconfig/openshift-sdn-node:
259
+ * Is the protocol https? It should be http.
260
+ * Can you reach the address and port from the node using curl?
261
+   ("404 page not found" is correct response)`,
262
+			},
263
+		},
264
+	},
265
+	{
266
+		Name:       "docker",
267
+		StartMatch: regexp.MustCompile(`Starting Docker Application Container Engine.`), // RHEL Docker at least
268
+		LogMatchers: []logMatcher{
269
+			{
270
+				Regexp: regexp.MustCompile(`Usage: docker \\[OPTIONS\\] COMMAND`),
271
+				Level:  log.ErrorLevel,
272
+				Id:     "sdLogDbadOpt",
273
+				Interpretation: `
274
+This indicates that docker failed to parse its command line
275
+successfully, so it just printed a standard usage message and exited.
276
+Its command line is built from variables in /etc/sysconfig/docker
277
+(which may be overridden by variables in /etc/sysconfig/openshift-sdn-node)
278
+so check there for problems.
279
+
280
+The OpenShift node will not work on this host until this is resolved.`,
281
+			},
282
+			{
283
+				Regexp: regexp.MustCompile(`^Unable to open the database file: unable to open database file$`),
284
+				Level:  log.ErrorLevel,
285
+				Id:     "sdLogDopenDB",
286
+				Interpretation: `
287
+This indicates that docker failed to record its state to its database.
288
+The most likely reason is that it is out of disk space. It is also
289
+possible for other device or permissions problems to be at fault.
290
+
291
+Sometimes this is due to excess completed containers not being cleaned
292
+up. You can delete all completed containers with this command (running
293
+containers will not be deleted):
294
+
295
+  # docker rm $(docker ps -qa)
296
+
297
+Whatever the reason, docker will not function in this state.
298
+The OpenShift node will not work on this host until this is resolved.`,
299
+			},
300
+			{
301
+				Regexp: regexp.MustCompile(`no space left on device$`),
302
+				Level:  log.ErrorLevel,
303
+				Id:     "sdLogDfull",
304
+				Interpretation: `
305
+This indicates that docker has run out of space for container volumes
306
+or metadata (by default, stored in /var/lib/docker, but configurable).
307
+
308
+docker will not function in this state. It requires that disk space be
309
+added to the relevant filesystem or files deleted to make space.
310
+Sometimes this is due to excess completed containers not being cleaned
311
+up. You can delete all completed containers with this command (running
312
+containers will not be deleted):
313
+
314
+  # docker rm $(docker ps -qa)
315
+
316
+The OpenShift node will not work on this host until this is resolved.`,
317
+			},
318
+			{ // generic error seen - do this last
319
+				Regexp: regexp.MustCompile(`\\slevel="fatal"\\s`),
320
+				Level:  log.ErrorLevel,
321
+				Id:     "sdLogDfatal",
322
+				Interpretation: `
323
+This is not a known problem, but it is causing Docker to crash,
324
+so the OpenShift node will not work on this host until it is resolved.`,
325
+			},
326
+		},
327
+	},
328
+	{
329
+		Name:        "openvswitch",
330
+		StartMatch:  regexp.MustCompile("Starting Open vSwitch"),
331
+		LogMatchers: []logMatcher{},
332
+	},
333
+}
334
+
335
+var systemdRelevant = func(env *discovery.Environment) (skip bool, reason string) {
336
+	if !env.HasSystemd {
337
+		return true, "systemd is not present on this host"
338
+	}
339
+	return false, ""
340
+}
341
+
342
+//
343
+// -------- The actual diagnostics definitions -----------
344
+//
345
+
346
+var Diagnostics = map[string]diagnostic.Diagnostic{
347
+
348
+	"AnalyzeLogs": {
349
+		Description: "Check for problems in systemd service logs since each service last started",
350
+		Condition:   systemdRelevant,
351
+		Run: func(env *discovery.Environment) {
352
+			for _, unit := range unitLogSpecs {
353
+				if svc := env.SystemdUnits[unit.Name]; svc.Enabled || svc.Active {
354
+					env.Log.Infom("sdCheckLogs", log.Msg{"tmpl": "Checking journalctl logs for '{{.name}}' service", "name": unit.Name})
355
+					matchLogsSinceLastStart(unit, env)
356
+				}
357
+			}
358
+		},
359
+	},
360
+
361
+	"UnitStatus": {
362
+		Description: "Check status for OpenShift-related systemd units",
363
+		Condition:   systemdRelevant,
364
+		Run: func(env *discovery.Environment) {
365
+			u := env.SystemdUnits
366
+			unitRequiresUnit(env.Log, u["openshift-node"], u["iptables"], `
367
+iptables is used by OpenShift nodes for container networking.
368
+Connections to a container will fail without it.`)
369
+			unitRequiresUnit(env.Log, u["openshift-node"], u["docker"], `OpenShift nodes use Docker to run containers.`)
370
+			unitRequiresUnit(env.Log, u["openshift"], u["docker"], `OpenShift nodes use Docker to run containers.`)
371
+			// node's dependency on openvswitch is a special case.
372
+			// We do not need to enable ovs because openshift-node starts it for us.
373
+			if u["openshift-node"].Active && !u["openvswitch"].Active {
374
+				env.Log.Error("sdUnitSDNreqOVS", `
375
+systemd unit openshift-node is running but openvswitch is not.
376
+Normally openshift-node starts openvswitch once initialized.
377
+It is likely that openvswitch has crashed or been stopped.
378
+
379
+The software-defined network (SDN) enables networking between
380
+containers on different nodes. Containers will not be able to
381
+connect to each other without the openvswitch service carrying
382
+this traffic.
383
+
384
+An administrator can start openvswitch with:
385
+
386
+  # systemctl start openvswitch
387
+
388
+To ensure it is not repeatedly failing to run, check the status and logs with:
389
+
390
+  # systemctl status openvswitch
391
+  # journalctl -ru openvswitch `)
392
+			}
393
+			// Anything that is enabled but not running deserves notice
394
+			for name, unit := range u {
395
+				if unit.Enabled && !unit.Active {
396
+					env.Log.Errorm("sdUnitInactive", log.Msg{"tmpl": `
397
+The {{.unit}} systemd unit is intended to start at boot but is not currently active.
398
+An administrator can start the {{.unit}} unit with:
399
+
400
+  # systemctl start {{.unit}}
401
+
402
+To ensure it is not failing to run, check the status and logs with:
403
+
404
+  # systemctl status {{.unit}}
405
+  # journalctl -ru {{.unit}}`, "unit": name})
406
+				}
407
+			}
408
+		},
409
+	},
410
+}
411
+
412
+//
413
+// -------- Functions used by the diagnostics -----------
414
+//
415
+
416
+func unitRequiresUnit(logger *log.Logger, unit types.SystemdUnit, requires types.SystemdUnit, reason string) {
417
+	if (unit.Active || unit.Enabled) && !requires.Exists {
418
+		logger.Errorm("sdUnitReqLoaded", log.Msg{"tmpl": `
419
+systemd unit {{.unit}} depends on unit {{.required}}, which is not loaded.
420
+{{.reason}}
421
+An administrator probably needs to install the {{.required}} unit with:
422
+
423
+  # yum install {{.required}}
424
+
425
+If it is already installed, you may to reload the definition with:
426
+
427
+  # systemctl reload {{.required}}
428
+  `, "unit": unit.Name, "required": requires.Name, "reason": reason})
429
+	} else if unit.Active && !requires.Active {
430
+		logger.Errorm("sdUnitReqActive", log.Msg{"tmpl": `
431
+systemd unit {{.unit}} is running but {{.required}} is not.
432
+{{.reason}}
433
+An administrator can start the {{.required}} unit with:
434
+
435
+  # systemctl start {{.required}}
436
+
437
+To ensure it is not failing to run, check the status and logs with:
438
+
439
+  # systemctl status {{.required}}
440
+  # journalctl -ru {{.required}}
441
+  `, "unit": unit.Name, "required": requires.Name, "reason": reason})
442
+	} else if unit.Enabled && !requires.Enabled {
443
+		logger.Warnm("sdUnitReqEnabled", log.Msg{"tmpl": `
444
+systemd unit {{.unit}} is enabled to run automatically at boot, but {{.required}} is not.
445
+{{.reason}}
446
+An administrator can enable the {{.required}} unit with:
447
+
448
+  # systemctl enable {{.required}}
449
+  `, "unit": unit.Name, "required": requires.Name, "reason": reason})
450
+	}
451
+}
452
+
453
+func matchLogsSinceLastStart(unit *unitSpec, env *discovery.Environment) {
454
+	cmd := exec.Command("journalctl", "-ru", unit.Name, "--output=json")
455
+	// JSON comes out of journalctl one line per record
456
+	lineReader, reader, err := func(cmd *exec.Cmd) (*bufio.Scanner, io.ReadCloser, error) {
457
+		stdout, err := cmd.StdoutPipe()
458
+		if err == nil {
459
+			lineReader := bufio.NewScanner(stdout)
460
+			if err = cmd.Start(); err == nil {
461
+				return lineReader, stdout, nil
462
+			}
463
+		}
464
+		return nil, nil, err
465
+	}(cmd)
466
+	if err != nil {
467
+		env.Log.Errorm("sdLogReadErr", log.Msg{"tmpl": `
468
+Diagnostics failed to query journalctl for the '{{.unit}}' unit logs.
469
+This should be very unusual, so please report this error:
470
+{{.error}}`, "unit": unit.Name, "error": errStr(err)})
471
+		return
472
+	}
473
+	defer func() { // close out pipe once done reading
474
+		reader.Close()
475
+		cmd.Wait()
476
+	}()
477
+	entryTemplate := logEntry{Message: `json:"MESSAGE"`}
478
+	matchCopy := append([]logMatcher(nil), unit.LogMatchers...) // make a copy, will remove matchers after they match something
479
+	for lineReader.Scan() {                                     // each log entry is a line
480
+		if len(matchCopy) == 0 { // if no rules remain to match
481
+			break // don't waste time reading more log entries
482
+		}
483
+		bytes, entry := lineReader.Bytes(), entryTemplate
484
+		if err := json.Unmarshal(bytes, &entry); err != nil {
485
+			env.Log.Debugm("sdLogBadJSON", log.Msg{"message": string(bytes), "error": errStr(err),
486
+				"tmpl": "Couldn't read the JSON for this log message:\n{{.message}}\nGot error {{.error}}"})
487
+		} else {
488
+			if unit.StartMatch.MatchString(entry.Message) {
489
+				break // saw the log message where the unit started; done looking.
490
+			}
491
+			for index, match := range matchCopy { // match log message against provided matchers
492
+				if strings := match.Regexp.FindStringSubmatch(entry.Message); strings != nil {
493
+					// if matches: print interpretation, remove from matchCopy, and go on to next log entry
494
+					keep := match.KeepAfterMatch
495
+					if match.Interpret != nil {
496
+						keep = match.Interpret(env, &entry, strings)
497
+					} else {
498
+						prelude := fmt.Sprintf("Found '%s' journald log message:\n  %s\n", unit.Name, entry.Message)
499
+						env.Log.Log(match.Level, match.Id, log.Msg{"text": prelude + match.Interpretation, "unit": unit.Name, "logMsg": entry.Message})
500
+					}
501
+					if !keep { // remove matcher once seen
502
+						matchCopy = append(matchCopy[:index], matchCopy[index+1:]...)
503
+					}
504
+					break
505
+				}
506
+			}
507
+		}
508
+	}
509
+}
510
+
511
+func errStr(err error) string {
512
+	return fmt.Sprintf("(%T) %[1]v", err)
513
+}
0 514
new file mode 100644
... ...
@@ -0,0 +1,16 @@
0
+package diagnostic
1
+
2
+// This needed to be separate from other types to avoid import cycle
3
+// diagnostic -> discovery -> types
4
+
5
+import (
6
+	"github.com/openshift/origin/pkg/diagnostics/discovery"
7
+)
8
+
9
+type DiagnosticCondition func(env *discovery.Environment) (skip bool, reason string)
10
+
11
+type Diagnostic struct {
12
+	Description string
13
+	Condition   DiagnosticCondition
14
+	Run         func(env *discovery.Environment)
15
+}
0 16
new file mode 100644
... ...
@@ -0,0 +1,9 @@
0
+package types
1
+
2
+type SystemdUnit struct {
3
+	Name       string
4
+	Exists     bool
5
+	Enabled    bool
6
+	Active     bool
7
+	ExitStatus int
8
+}
0 9
new file mode 100644
... ...
@@ -0,0 +1,38 @@
0
+package types
1
+
2
+import "fmt"
3
+
4
+type Version struct {
5
+	X, Y, Z int
6
+}
7
+
8
+func (a Version) Eq(b Version) bool {
9
+	return a.X == b.X && a.Y == b.Y && a.Z == b.Z
10
+}
11
+
12
+func (a Version) Gt(b Version) bool {
13
+	if a.X > b.X {
14
+		return true
15
+	}
16
+	if a.X < b.X {
17
+		return false
18
+	} // so, Xs are equal
19
+	if a.Y > b.Y {
20
+		return true
21
+	}
22
+	if a.Y < b.Y {
23
+		return false
24
+	} // so, Ys are equal
25
+	if a.Z > b.Z {
26
+		return true
27
+	}
28
+	return false
29
+}
30
+
31
+func (v Version) GoString() string {
32
+	return fmt.Sprintf("%d.%d.%d", v.X, v.Y, v.Z)
33
+}
34
+
35
+func (v Version) NonZero() bool {
36
+	return !v.Eq(Version{0, 0, 0})
37
+}