17 | 16 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,194 @@ |
0 |
+package cmd |
|
1 |
+ |
|
2 |
+import ( |
|
3 |
+ "fmt" |
|
4 |
+ "github.com/openshift/origin/pkg/cmd/experimental/diagnostics/options" |
|
5 |
+ "github.com/openshift/origin/pkg/cmd/server/start" |
|
6 |
+ "github.com/openshift/origin/pkg/cmd/templates" |
|
7 |
+ osclientcmd "github.com/openshift/origin/pkg/cmd/util/clientcmd" |
|
8 |
+ "github.com/openshift/origin/pkg/diagnostics/run" |
|
9 |
+ "github.com/spf13/cobra" |
|
10 |
+ "io" |
|
11 |
+) |
|
12 |
+ |
|
13 |
+const longAllDescription = ` |
|
14 |
+OpenShift Diagnostics |
|
15 |
+ |
|
16 |
+This command helps you understand and troubleshoot OpenShift. It is |
|
17 |
+intended to be run from the same context as an OpenShift client or running |
|
18 |
+master / node in order to troubleshoot from the perspective of each. |
|
19 |
+ |
|
20 |
+ $ %[1]s |
|
21 |
+ |
|
22 |
+If run without flags or subcommands, it will check for config files for |
|
23 |
+client, master, and node, and if found, use them for troubleshooting |
|
24 |
+those components. If master/node config files are not found, the tool |
|
25 |
+assumes they are not present and does diagnostics only as a client. |
|
26 |
+ |
|
27 |
+You may also specify config files explicitly with flags below, in which |
|
28 |
+case you will receive an error if they are invalid or not found. |
|
29 |
+ |
|
30 |
+ $ %[1]s --master-config=/etc/openshift/master/master-config.yaml |
|
31 |
+ |
|
32 |
+Subcommands may be used to scope the troubleshooting to a particular |
|
33 |
+component and are not limited to using config files; you can and should |
|
34 |
+use the same flags that are actually set on the command line for that |
|
35 |
+component to configure the diagnostic. |
|
36 |
+ |
|
37 |
+ $ %[1]s node --hostname='node.example.com' --kubeconfig=... |
|
38 |
+ |
|
39 |
+NOTE: This is an alpha version of diagnostics and will change significantly. |
|
40 |
+NOTE: Global flags (from the 'options' subcommand) are ignored here but |
|
41 |
+can be used with subcommands. |
|
42 |
+` |
|
43 |
+ |
|
44 |
+func NewCommandDiagnostics(name string, fullName string, out io.Writer) *cobra.Command { |
|
45 |
+ opts := options.NewAllDiagnosticsOptions(out) |
|
46 |
+ cmd := &cobra.Command{ |
|
47 |
+ Use: name, |
|
48 |
+ Short: "This utility helps you understand and troubleshoot OpenShift v3.", |
|
49 |
+ Long: fmt.Sprintf(longAllDescription, fullName), |
|
50 |
+ Run: func(c *cobra.Command, args []string) { |
|
51 |
+ opts.GlobalFlags = c.PersistentFlags() |
|
52 |
+ run.Diagnose(opts) |
|
53 |
+ }, |
|
54 |
+ } |
|
55 |
+ cmd.SetOutput(out) // for output re: usage / help |
|
56 |
+ opts.BindFlags(cmd.Flags(), options.NewAllDiagnosticsFlagInfos()) |
|
57 |
+ // Although we reuse DiagOptions across all commands, we do not want the flags buried |
|
58 |
+ // in the "global" flags, so we add them locally at each command. |
|
59 |
+ opts.DiagOptions.BindFlags(cmd.Flags(), options.NewDiagnosticsFlagInfos()) |
|
60 |
+ |
|
61 |
+ /* |
|
62 |
+ This command needs the client factory built in the "client" subcommand. |
|
63 |
+ Generating the factory adds flags to the "client" cmd, and we do not want |
|
64 |
+ to add those flags to this command (the only client option here is a config |
|
65 |
+ file). So the factory object from client cmd is reused for this command. |
|
66 |
+ */ |
|
67 |
+ clientCmd, factory := NewClientCommand("client", name+" client", out) |
|
68 |
+ opts.ClientDiagOptions.Factory = factory |
|
69 |
+ |
|
70 |
+ cmd.AddCommand(clientCmd) |
|
71 |
+ cmd.AddCommand(NewMasterCommand("master", name+" master", out)) |
|
72 |
+ cmd.AddCommand(NewNodeCommand("node", name+" node", out)) |
|
73 |
+ cmd.AddCommand(NewOptionsCommand()) |
|
74 |
+ |
|
75 |
+ return cmd |
|
76 |
+} |
|
77 |
+ |
|
78 |
+const longClientDescription = ` |
|
79 |
+OpenShift Diagnostics |
|
80 |
+ |
|
81 |
+This command helps you understand and troubleshoot OpenShift as a user. It is |
|
82 |
+intended to be run from the same context as an OpenShift client |
|
83 |
+("openshift cli" or "osc") and with the same configuration options. |
|
84 |
+ |
|
85 |
+ $ %s |
|
86 |
+` |
|
87 |
+ |
|
88 |
+func NewClientCommand(name string, fullName string, out io.Writer) (*cobra.Command, *osclientcmd.Factory) { |
|
89 |
+ opts := options.NewClientDiagnosticsOptions(out, nil) |
|
90 |
+ cmd := &cobra.Command{ |
|
91 |
+ Use: name, |
|
92 |
+ Short: "Troubleshoot using the OpenShift v3 client.", |
|
93 |
+ Long: fmt.Sprintf(longClientDescription, fullName), |
|
94 |
+ Run: func(c *cobra.Command, args []string) { |
|
95 |
+ run.Diagnose(&options.AllDiagnosticsOptions{ |
|
96 |
+ ClientDiagOptions: opts, |
|
97 |
+ DiagOptions: opts.DiagOptions, |
|
98 |
+ GlobalFlags: c.PersistentFlags(), |
|
99 |
+ }) |
|
100 |
+ }, |
|
101 |
+ } |
|
102 |
+ cmd.SetOutput(out) // for output re: usage / help |
|
103 |
+ opts.MustCheck = true |
|
104 |
+ opts.Factory = osclientcmd.New(cmd.PersistentFlags()) // side effect: add standard persistent flags for openshift client |
|
105 |
+ opts.BindFlags(cmd.Flags(), options.NewClientDiagnosticsFlagInfos()) |
|
106 |
+ opts.DiagOptions.BindFlags(cmd.Flags(), options.NewDiagnosticsFlagInfos()) |
|
107 |
+ |
|
108 |
+ cmd.AddCommand(NewOptionsCommand()) |
|
109 |
+ return cmd, opts.Factory |
|
110 |
+} |
|
111 |
+ |
|
112 |
+const longMasterDescription = ` |
|
113 |
+OpenShift Diagnostics |
|
114 |
+ |
|
115 |
+This command helps you understand and troubleshoot a running OpenShift |
|
116 |
+master. It is intended to be run from the same context as the master |
|
117 |
+(where "openshift start" or "openshift start master" is run, possibly from |
|
118 |
+systemd or inside a container) and with the same configuration options. |
|
119 |
+ |
|
120 |
+ $ %s |
|
121 |
+` |
|
122 |
+ |
|
123 |
+func NewMasterCommand(name string, fullName string, out io.Writer) *cobra.Command { |
|
124 |
+ opts := options.NewMasterDiagnosticsOptions(out, nil) |
|
125 |
+ cmd := &cobra.Command{ |
|
126 |
+ Use: name, |
|
127 |
+ Short: "Troubleshoot an OpenShift v3 master.", |
|
128 |
+ Long: fmt.Sprintf(longMasterDescription, fullName), |
|
129 |
+ Run: func(c *cobra.Command, args []string) { |
|
130 |
+ run.Diagnose(&options.AllDiagnosticsOptions{ |
|
131 |
+ MasterDiagOptions: opts, |
|
132 |
+ DiagOptions: opts.DiagOptions, |
|
133 |
+ GlobalFlags: c.PersistentFlags(), |
|
134 |
+ }) |
|
135 |
+ }, |
|
136 |
+ } |
|
137 |
+ cmd.SetOutput(out) // for output re: usage / help |
|
138 |
+ opts.MustCheck = true |
|
139 |
+ opts.MasterStartOptions = &start.MasterOptions{MasterArgs: start.MasterArgsAndFlags(cmd.Flags())} |
|
140 |
+ opts.BindFlags(cmd.Flags(), options.NewMasterDiagnosticsFlagInfos()) |
|
141 |
+ opts.DiagOptions.BindFlags(cmd.Flags(), options.NewDiagnosticsFlagInfos()) |
|
142 |
+ |
|
143 |
+ cmd.AddCommand(NewOptionsCommand()) |
|
144 |
+ return cmd |
|
145 |
+} |
|
146 |
+ |
|
147 |
+const longNodeDescription = ` |
|
148 |
+OpenShift Diagnostics |
|
149 |
+ |
|
150 |
+This command helps you understand and troubleshoot a running OpenShift |
|
151 |
+node. It is intended to be run from the same context as the node |
|
152 |
+(where "openshift start" or "openshift start node" is run, possibly from |
|
153 |
+systemd or inside a container) and with the same configuration options. |
|
154 |
+ |
|
155 |
+ $ %s |
|
156 |
+` |
|
157 |
+ |
|
158 |
+func NewNodeCommand(name string, fullName string, out io.Writer) *cobra.Command { |
|
159 |
+ opts := options.NewNodeDiagnosticsOptions(out, nil) |
|
160 |
+ cmd := &cobra.Command{ |
|
161 |
+ Use: name, |
|
162 |
+ Short: "Troubleshoot an OpenShift v3 node.", |
|
163 |
+ Long: fmt.Sprintf(longNodeDescription, fullName), |
|
164 |
+ Run: func(c *cobra.Command, args []string) { |
|
165 |
+ run.Diagnose(&options.AllDiagnosticsOptions{ |
|
166 |
+ NodeDiagOptions: opts, |
|
167 |
+ DiagOptions: opts.DiagOptions, |
|
168 |
+ GlobalFlags: c.PersistentFlags(), |
|
169 |
+ }) |
|
170 |
+ }, |
|
171 |
+ } |
|
172 |
+ cmd.SetOutput(out) // for output re: usage / help |
|
173 |
+ opts.MustCheck = true |
|
174 |
+ opts.NodeStartOptions = &start.NodeOptions{NodeArgs: start.NodeArgsAndFlags(cmd.Flags())} |
|
175 |
+ opts.BindFlags(cmd.Flags(), options.NewNodeDiagnosticsFlagInfos()) |
|
176 |
+ opts.DiagOptions.BindFlags(cmd.Flags(), options.NewDiagnosticsFlagInfos()) |
|
177 |
+ |
|
178 |
+ cmd.AddCommand(NewOptionsCommand()) |
|
179 |
+ return cmd |
|
180 |
+} |
|
181 |
+ |
|
182 |
+func NewOptionsCommand() *cobra.Command { |
|
183 |
+ cmd := &cobra.Command{ |
|
184 |
+ Use: "options", |
|
185 |
+ Run: func(cmd *cobra.Command, args []string) { |
|
186 |
+ cmd.Usage() |
|
187 |
+ }, |
|
188 |
+ } |
|
189 |
+ |
|
190 |
+ templates.UseOptionsTemplates(cmd) |
|
191 |
+ |
|
192 |
+ return cmd |
|
193 |
+} |
0 | 194 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,53 @@ |
0 |
+package options |
|
1 |
+ |
|
2 |
+import ( |
|
3 |
+ "github.com/spf13/pflag" |
|
4 |
+ "io" |
|
5 |
+) |
|
6 |
+ |
|
7 |
+// user options for openshift-diagnostics main command |
|
8 |
+type AllDiagnosticsOptions struct { |
|
9 |
+ DiagOptions *DiagnosticsOptions |
|
10 |
+ ClientDiagOptions *ClientDiagnosticsOptions |
|
11 |
+ MasterDiagOptions *MasterDiagnosticsOptions |
|
12 |
+ NodeDiagOptions *NodeDiagnosticsOptions |
|
13 |
+ ClientConfigPath string |
|
14 |
+ MasterConfigPath string |
|
15 |
+ NodeConfigPath string |
|
16 |
+ |
|
17 |
+ // there are cases where discovery has to look up flags created indirectly |
|
18 |
+ GlobalFlags *pflag.FlagSet |
|
19 |
+} |
|
20 |
+ |
|
21 |
+// definitions used to bind the options to actual flags on a command |
|
22 |
+type AllDiagnosticsFlagInfos struct { |
|
23 |
+ ClientConfigPath FlagInfo |
|
24 |
+ MasterConfigPath FlagInfo |
|
25 |
+ NodeConfigPath FlagInfo |
|
26 |
+} |
|
27 |
+ |
|
28 |
+func NewAllDiagnosticsOptions(out io.Writer) *AllDiagnosticsOptions { |
|
29 |
+ common := NewDiagnosticsOptions(out) |
|
30 |
+ |
|
31 |
+ return &AllDiagnosticsOptions{ |
|
32 |
+ DiagOptions: common, |
|
33 |
+ ClientDiagOptions: NewClientDiagnosticsOptions(nil, common), |
|
34 |
+ MasterDiagOptions: NewMasterDiagnosticsOptions(nil, common), |
|
35 |
+ NodeDiagOptions: NewNodeDiagnosticsOptions(nil, common), |
|
36 |
+ } |
|
37 |
+} |
|
38 |
+ |
|
39 |
+// default overrideable flag specifications to be bound to options. |
|
40 |
+func NewAllDiagnosticsFlagInfos() *AllDiagnosticsFlagInfos { |
|
41 |
+ return &AllDiagnosticsFlagInfos{ |
|
42 |
+ ClientConfigPath: FlagInfo{FlagAllClientConfigName, "", "", "Path to the client config file."}, |
|
43 |
+ MasterConfigPath: FlagInfo{FlagAllMasterConfigName, "", "", "Path to the master config file."}, |
|
44 |
+ NodeConfigPath: FlagInfo{FlagAllNodeConfigName, "", "", "Path to the node config file."}, |
|
45 |
+ } |
|
46 |
+} |
|
47 |
+ |
|
48 |
+func (o *AllDiagnosticsOptions) BindFlags(cmdFlags *pflag.FlagSet, flagInfos *AllDiagnosticsFlagInfos) { |
|
49 |
+ flagInfos.ClientConfigPath.BindStringFlag(cmdFlags, &o.ClientConfigPath) |
|
50 |
+ flagInfos.MasterConfigPath.BindStringFlag(cmdFlags, &o.MasterConfigPath) |
|
51 |
+ flagInfos.NodeConfigPath.BindStringFlag(cmdFlags, &o.NodeConfigPath) |
|
52 |
+} |
0 | 53 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,46 @@ |
0 |
+package options |
|
1 |
+ |
|
2 |
+import ( |
|
3 |
+ osclientcmd "github.com/openshift/origin/pkg/cmd/util/clientcmd" |
|
4 |
+ "github.com/spf13/pflag" |
|
5 |
+ "io" |
|
6 |
+) |
|
7 |
+ |
|
8 |
+// user options for openshift-diagnostics client command |
|
9 |
+type ClientDiagnosticsOptions struct { |
|
10 |
+ DiagOptions *DiagnosticsOptions |
|
11 |
+ Factory *osclientcmd.Factory |
|
12 |
+ MustCheck bool // set for "diagnostics client" which requires diagnosing client even there is if no config file |
|
13 |
+ // Turns out we don't need to add any flags... YET |
|
14 |
+} |
|
15 |
+ |
|
16 |
+// definitions used to bind the options to actual flags on a command |
|
17 |
+type ClientDiagnosticsFlagInfos struct { |
|
18 |
+ // don't need yet... |
|
19 |
+ //Something FlagInfo |
|
20 |
+} |
|
21 |
+ |
|
22 |
+// supply output writer or pre-created DiagnosticsOptions |
|
23 |
+func NewClientDiagnosticsOptions(out io.Writer, opts *DiagnosticsOptions) *ClientDiagnosticsOptions { |
|
24 |
+ if opts != nil { |
|
25 |
+ return &ClientDiagnosticsOptions{ |
|
26 |
+ DiagOptions: opts, |
|
27 |
+ } |
|
28 |
+ } else if out != nil { |
|
29 |
+ return &ClientDiagnosticsOptions{ |
|
30 |
+ DiagOptions: NewDiagnosticsOptions(out), |
|
31 |
+ } |
|
32 |
+ } |
|
33 |
+ return nil |
|
34 |
+} |
|
35 |
+ |
|
36 |
+// default overrideable flag specifications to be bound to options. |
|
37 |
+func NewClientDiagnosticsFlagInfos() *ClientDiagnosticsFlagInfos { |
|
38 |
+ return &ClientDiagnosticsFlagInfos{ |
|
39 |
+ //NodeConfigPath: FlagInfo{"node-config", "", "", "Path to the node config file."}, |
|
40 |
+ } |
|
41 |
+} |
|
42 |
+ |
|
43 |
+func (o *ClientDiagnosticsOptions) BindFlags(cmdFlags *pflag.FlagSet, flagInfos *ClientDiagnosticsFlagInfos) { |
|
44 |
+ //flagInfos.Something.BindStringFlag(cmdFlags, &o.Something) |
|
45 |
+} |
0 | 46 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,46 @@ |
0 |
+package options |
|
1 |
+ |
|
2 |
+import ( |
|
3 |
+ "github.com/GoogleCloudPlatform/kubernetes/pkg/util" |
|
4 |
+ cmdutil "github.com/openshift/origin/pkg/cmd/util" |
|
5 |
+ "github.com/spf13/pflag" |
|
6 |
+ "io" |
|
7 |
+) |
|
8 |
+ |
|
9 |
+// all of the diagnostics commands will bind these options |
|
10 |
+type DiagnosticsOptions struct { |
|
11 |
+ Diagnostics *util.StringList // named diagnostics to run |
|
12 |
+ DiagLevel int // show output of this priority or higher |
|
13 |
+ DiagFormat string // format of output - text/json/yaml |
|
14 |
+ |
|
15 |
+ Output cmdutil.Output // this is used for discovery and diagnostic output |
|
16 |
+} |
|
17 |
+ |
|
18 |
+func NewDiagnosticsOptions(out io.Writer) *DiagnosticsOptions { |
|
19 |
+ return &DiagnosticsOptions{ |
|
20 |
+ Diagnostics: &util.StringList{}, // have to instantiate in order to bind flag |
|
21 |
+ Output: cmdutil.Output{out}, |
|
22 |
+ } |
|
23 |
+} |
|
24 |
+ |
|
25 |
+// definitions used to bind the options to actual flags on a command |
|
26 |
+type DiagnosticsFlagInfos struct { |
|
27 |
+ Diagnostics FlagInfo |
|
28 |
+ DiagLevel FlagInfo |
|
29 |
+ DiagFormat FlagInfo |
|
30 |
+} |
|
31 |
+ |
|
32 |
+// default overrideable flag specifications to be bound to options. |
|
33 |
+func NewDiagnosticsFlagInfos() *DiagnosticsFlagInfos { |
|
34 |
+ return &DiagnosticsFlagInfos{ |
|
35 |
+ Diagnostics: FlagInfo{FlagDiagnosticsName, "d", "", `comma-separated list of diagnostic names to run, e.g. "systemd.AnalyzeLogs"`}, |
|
36 |
+ DiagLevel: FlagInfo{FlagLevelName, "l", "3", "Level of diagnostic output: 0: Error, 1: Warn, 2: Notice, 3: Info, 4: Debug"}, |
|
37 |
+ DiagFormat: FlagInfo{FlagFormatName, "o", "text", "Output format: text|json|yaml"}, |
|
38 |
+ } |
|
39 |
+} |
|
40 |
+ |
|
41 |
+func (o *DiagnosticsOptions) BindFlags(cmdFlags *pflag.FlagSet, flagInfos *DiagnosticsFlagInfos) { |
|
42 |
+ flagInfos.Diagnostics.BindListFlag(cmdFlags, o.Diagnostics) |
|
43 |
+ flagInfos.DiagLevel.BindIntFlag(cmdFlags, &o.DiagLevel) |
|
44 |
+ flagInfos.DiagFormat.BindStringFlag(cmdFlags, &o.DiagFormat) |
|
45 |
+} |
0 | 46 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,57 @@ |
0 |
+package options |
|
1 |
+ |
|
2 |
+import ( |
|
3 |
+ kclientcmd "github.com/GoogleCloudPlatform/kubernetes/pkg/client/clientcmd" |
|
4 |
+ kutil "github.com/GoogleCloudPlatform/kubernetes/pkg/util" |
|
5 |
+ "github.com/spf13/pflag" |
|
6 |
+ "strconv" |
|
7 |
+) |
|
8 |
+ |
|
9 |
+type FlagInfo kclientcmd.FlagInfo // reuse to add methods |
|
10 |
+ |
|
11 |
+// FlagInfos serve as a customizable intermediary between the command flags and |
|
12 |
+// the options object they feed into. This enables reuse of the flags and options |
|
13 |
+// with tweaked definitions in different contexts if necessary. |
|
14 |
+ |
|
15 |
+func (i FlagInfo) BindStringFlag(flags *pflag.FlagSet, target *string) { |
|
16 |
+ // assume flags with no longname are not desired |
|
17 |
+ if len(i.LongName) > 0 { |
|
18 |
+ flags.StringVarP(target, i.LongName, i.ShortName, i.Default, i.Description) |
|
19 |
+ } |
|
20 |
+} |
|
21 |
+ |
|
22 |
+func (i FlagInfo) BindIntFlag(flags *pflag.FlagSet, target *int) { |
|
23 |
+ // assume flags with no longname are not desired |
|
24 |
+ if len(i.LongName) > 0 { |
|
25 |
+ // try to parse Default as an int. If it fails, assume 0 |
|
26 |
+ intVal, _ := strconv.ParseInt(i.Default, 10, 0) |
|
27 |
+ flags.IntVarP(target, i.LongName, i.ShortName, int(intVal), i.Description) |
|
28 |
+ } |
|
29 |
+} |
|
30 |
+ |
|
31 |
+func (i FlagInfo) BindBoolFlag(flags *pflag.FlagSet, target *bool) { |
|
32 |
+ // assume flags with no longname are not desired |
|
33 |
+ if len(i.LongName) > 0 { |
|
34 |
+ // try to parse Default as a bool. If it fails, assume false |
|
35 |
+ boolVal, _ := strconv.ParseBool(i.Default) |
|
36 |
+ flags.BoolVarP(target, i.LongName, i.ShortName, boolVal, i.Description) |
|
37 |
+ } |
|
38 |
+} |
|
39 |
+ |
|
40 |
+func (i FlagInfo) BindListFlag(flags *pflag.FlagSet, target *kutil.StringList) { |
|
41 |
+ // assume flags with no longname are not desired |
|
42 |
+ if len(i.LongName) > 0 { |
|
43 |
+ flags.VarP(target, i.LongName, i.ShortName, i.Description) |
|
44 |
+ } |
|
45 |
+} |
|
46 |
+ |
|
47 |
+const ( |
|
48 |
+ FlagAllClientConfigName = "client-config" |
|
49 |
+ FlagAllMasterConfigName = "master-config" |
|
50 |
+ FlagAllNodeConfigName = "node-config" |
|
51 |
+ FlagDiagnosticsName = "diagnostics" |
|
52 |
+ FlagLevelName = "diaglevel" |
|
53 |
+ FlagFormatName = "output" |
|
54 |
+ FlagMasterConfigName = "config" |
|
55 |
+ FlagNodeConfigName = "config" |
|
56 |
+) |
0 | 57 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,45 @@ |
0 |
+package options |
|
1 |
+ |
|
2 |
+import ( |
|
3 |
+ "github.com/openshift/origin/pkg/cmd/server/start" |
|
4 |
+ "github.com/spf13/pflag" |
|
5 |
+ "io" |
|
6 |
+) |
|
7 |
+ |
|
8 |
+// user options for openshift-diagnostics master command |
|
9 |
+type MasterDiagnosticsOptions struct { |
|
10 |
+ DiagOptions *DiagnosticsOptions |
|
11 |
+ MustCheck bool // set for "diagnostics master" which requires diagnosing master even if there is no config file |
|
12 |
+ // reuse the master options from "openshift start master" |
|
13 |
+ MasterStartOptions *start.MasterOptions |
|
14 |
+} |
|
15 |
+ |
|
16 |
+// definitions used to bind the options to actual flags on a command |
|
17 |
+type MasterDiagnosticsFlagInfos struct { |
|
18 |
+ ConfigFile FlagInfo |
|
19 |
+} |
|
20 |
+ |
|
21 |
+// supply output writer or pre-created DiagnosticsOptions |
|
22 |
+func NewMasterDiagnosticsOptions(out io.Writer, opts *DiagnosticsOptions) *MasterDiagnosticsOptions { |
|
23 |
+ if opts != nil { |
|
24 |
+ return &MasterDiagnosticsOptions{ |
|
25 |
+ DiagOptions: opts, |
|
26 |
+ } |
|
27 |
+ } else if out != nil { |
|
28 |
+ return &MasterDiagnosticsOptions{ |
|
29 |
+ DiagOptions: NewDiagnosticsOptions(out), |
|
30 |
+ } |
|
31 |
+ } |
|
32 |
+ return nil |
|
33 |
+} |
|
34 |
+ |
|
35 |
+// default overrideable flag specifications to be bound to options. |
|
36 |
+func NewMasterDiagnosticsFlagInfos() *MasterDiagnosticsFlagInfos { |
|
37 |
+ return &MasterDiagnosticsFlagInfos{ |
|
38 |
+ ConfigFile: FlagInfo{FlagMasterConfigName, "", "", "Location of the master configuration file to run from. When running from a configuration file, all other command-line arguments are ignored."}, |
|
39 |
+ } |
|
40 |
+} |
|
41 |
+ |
|
42 |
+func (o *MasterDiagnosticsOptions) BindFlags(cmdFlags *pflag.FlagSet, flagInfos *MasterDiagnosticsFlagInfos) { |
|
43 |
+ flagInfos.ConfigFile.BindStringFlag(cmdFlags, &o.MasterStartOptions.ConfigFile) |
|
44 |
+} |
0 | 45 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,45 @@ |
0 |
+package options |
|
1 |
+ |
|
2 |
+import ( |
|
3 |
+ "github.com/openshift/origin/pkg/cmd/server/start" |
|
4 |
+ "github.com/spf13/pflag" |
|
5 |
+ "io" |
|
6 |
+) |
|
7 |
+ |
|
8 |
+// user options for openshift-diagnostics node command |
|
9 |
+type NodeDiagnosticsOptions struct { |
|
10 |
+ DiagOptions *DiagnosticsOptions |
|
11 |
+ MustCheck bool // set for "diagnostics node" which requires diagnosing node even if there is no config file |
|
12 |
+ // reuse the node options from "openshift start node" |
|
13 |
+ NodeStartOptions *start.NodeOptions |
|
14 |
+} |
|
15 |
+ |
|
16 |
+// definitions used to bind the options to actual flags on a command |
|
17 |
+type NodeDiagnosticsFlagInfos struct { |
|
18 |
+ ConfigFile FlagInfo |
|
19 |
+} |
|
20 |
+ |
|
21 |
+// supply output writer or pre-created DiagnosticsOptions |
|
22 |
+func NewNodeDiagnosticsOptions(out io.Writer, opts *DiagnosticsOptions) *NodeDiagnosticsOptions { |
|
23 |
+ if opts != nil { |
|
24 |
+ return &NodeDiagnosticsOptions{ |
|
25 |
+ DiagOptions: opts, |
|
26 |
+ } |
|
27 |
+ } else if out != nil { |
|
28 |
+ return &NodeDiagnosticsOptions{ |
|
29 |
+ DiagOptions: NewDiagnosticsOptions(out), |
|
30 |
+ } |
|
31 |
+ } |
|
32 |
+ return nil |
|
33 |
+} |
|
34 |
+ |
|
35 |
+// default overrideable flag specifications to be bound to options. |
|
36 |
+func NewNodeDiagnosticsFlagInfos() *NodeDiagnosticsFlagInfos { |
|
37 |
+ return &NodeDiagnosticsFlagInfos{ |
|
38 |
+ ConfigFile: FlagInfo{FlagNodeConfigName, "", "", "Location of the node configuration file to run from. When running from a configuration file, all other command-line arguments are ignored."}, |
|
39 |
+ } |
|
40 |
+} |
|
41 |
+ |
|
42 |
+func (o *NodeDiagnosticsOptions) BindFlags(cmdFlags *pflag.FlagSet, flagInfos *NodeDiagnosticsFlagInfos) { |
|
43 |
+ flagInfos.ConfigFile.BindStringFlag(cmdFlags, &o.NodeStartOptions.ConfigFile) |
|
44 |
+} |
... | ... |
@@ -12,6 +12,7 @@ import ( |
12 | 12 |
"github.com/openshift/origin/pkg/cmd/cli" |
13 | 13 |
"github.com/openshift/origin/pkg/cmd/cli/cmd" |
14 | 14 |
"github.com/openshift/origin/pkg/cmd/experimental/buildchain" |
15 |
+ diagnostics "github.com/openshift/origin/pkg/cmd/experimental/diagnostics" |
|
15 | 16 |
exipfailover "github.com/openshift/origin/pkg/cmd/experimental/ipfailover" |
16 | 17 |
"github.com/openshift/origin/pkg/cmd/experimental/tokens" |
17 | 18 |
"github.com/openshift/origin/pkg/cmd/flagtypes" |
... | ... |
@@ -52,6 +53,8 @@ func CommandFor(basename string) *cobra.Command { |
52 | 52 |
cmd = irouter.NewCommandRouter(basename) |
53 | 53 |
case "openshift-deploy": |
54 | 54 |
cmd = deployer.NewCommandDeployer(basename) |
55 |
+ case "openshift-diagnostics": |
|
56 |
+ cmd = diagnostics.NewCommandDiagnostics(basename, basename, os.Stdout) |
|
55 | 57 |
case "openshift-sti-build": |
56 | 58 |
cmd = builder.NewCommandSTIBuilder(basename) |
57 | 59 |
case "openshift-docker-build": |
... | ... |
@@ -155,6 +158,7 @@ func newExperimentalCommand(name, fullName string) *cobra.Command { |
155 | 155 |
experimental.AddCommand(tokens.NewCmdTokens(tokens.TokenRecommendedCommandName, fullName+" "+tokens.TokenRecommendedCommandName, f, out)) |
156 | 156 |
experimental.AddCommand(exipfailover.NewCmdIPFailoverConfig(f, fullName, "ipfailover", out)) |
157 | 157 |
experimental.AddCommand(buildchain.NewCmdBuildChain(name, fullName+" "+buildchain.BuildChainRecommendedCommandName, f, out)) |
158 |
+ experimental.AddCommand(diagnostics.NewCommandDiagnostics("diagnostics", fullName+" diagnostics", out)) |
|
158 | 159 |
experimental.AddCommand(cmd.NewCmdOptions(out)) |
159 | 160 |
return experimental |
160 | 161 |
} |
161 | 162 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,84 @@ |
0 |
+OpenShift v3 Diagnostics |
|
1 |
+======================== |
|
2 |
+ |
|
3 |
+This is a tool to help administrators and users resolve common problems |
|
4 |
+that occur with OpenShift v3 deployments. It is currently (May 2015) |
|
5 |
+under continuous development as the OpenShift Origin project progresses. |
|
6 |
+ |
|
7 |
+The goals of the diagnostics tool are summarized in this [Trello |
|
8 |
+card](https://trello.com/c/LdUogKuN). Diagnostics are included as an |
|
9 |
+`openshift` binary sub-command that analyzes OpenShift as it finds it, |
|
10 |
+whether from the perspective of an OpenShift client or on an OpenShift |
|
11 |
+host. |
|
12 |
+ |
|
13 |
+Expected environment |
|
14 |
+==================== |
|
15 |
+ |
|
16 |
+OpenShift can be deployed in many ways: built from source, included |
|
17 |
+in a VM image, in a Docker image, or as enterprise RPMs. Each of these |
|
18 |
+would imply different configuration and environment. In order to keep |
|
19 |
+assumptions about environment to a minimum, the diagnostics have been |
|
20 |
+added to the `openshift` binary itself so that wherever there is an |
|
21 |
+OpenShift server or client, the diagnostics can run in the exact same |
|
22 |
+environment. |
|
23 |
+ |
|
24 |
+`openshift ex diagnostics` subcommands for master, node, and client |
|
25 |
+provide flags to mimic the configurations for those respective components, |
|
26 |
+so that running diagnostics against a component should be as simple as |
|
27 |
+supplying the same flags that would invoke the component. So, |
|
28 |
+for example, if a master is started with: |
|
29 |
+ |
|
30 |
+ openshift start master --public-hostname=... |
|
31 |
+ |
|
32 |
+Then diagnostics against that master would simply be run as: |
|
33 |
+ |
|
34 |
+ openshift ex diagnostics master --public-hostname=... |
|
35 |
+ |
|
36 |
+In this way it should be possible to invoke diagnostics against any |
|
37 |
+given environment. |
|
38 |
+ |
|
39 |
+Host environment |
|
40 |
+================ |
|
41 |
+ |
|
42 |
+However, master/node diagnostics will be most useful in a specific |
|
43 |
+target environment, which is a deployment using Enterprise RPMs and |
|
44 |
+ansible deployment logic. This provides two major benefits: |
|
45 |
+ |
|
46 |
+* master/node configuration is based on a configuration file in a standard location |
|
47 |
+* all components log to journald |
|
48 |
+ |
|
49 |
+Having configuration file in standard locations means you will generally |
|
50 |
+not even need to specify where to find them. Running: |
|
51 |
+ |
|
52 |
+ openshift ex diagnostics |
|
53 |
+ |
|
54 |
+by itself will look for master and node configs (in addition to client |
|
55 |
+config file) in the standard locations and use them if found; so this |
|
56 |
+should make the Enterprise use case as simple as possible. It's also |
|
57 |
+very easy to use configuration files when they are not in the expected |
|
58 |
+Enterprise locations: |
|
59 |
+ |
|
60 |
+ openshift ex diagnostics --master-config=... --node-config=... |
|
61 |
+ |
|
62 |
+Having logs in journald is necessary for the current log analysis |
|
63 |
+logic. Other usage may have logs going into files, output to stdout, |
|
64 |
+combined node/master... it may not be too hard to extend analysis to |
|
65 |
+other log sources but the priority has been to look at journald logs |
|
66 |
+as created by components in Enterprise deployments (including docker, |
|
67 |
+openvswitch, etc.). |
|
68 |
+ |
|
69 |
+Client environment |
|
70 |
+================== |
|
71 |
+ |
|
72 |
+The user may only have access as an ordinary user, as a cluster-admin |
|
73 |
+user, or may have admin on a host where OpenShift master or node services |
|
74 |
+are operating. The diagnostics will attempt to use as much access as |
|
75 |
+the user has available. |
|
76 |
+ |
|
77 |
+A client with ordinary access should be able to diagnose its connection |
|
78 |
+to the master and look for problems in builds and deployments. |
|
79 |
+ |
|
80 |
+A client with cluster-admin access should be able to diagnose the same |
|
81 |
+things for every project in the deployment, as well as infrastructure |
|
82 |
+status. |
|
83 |
+ |
0 | 84 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,293 @@ |
0 |
+package client |
|
1 |
+ |
|
2 |
+import ( |
|
3 |
+ "fmt" |
|
4 |
+ kapi "github.com/GoogleCloudPlatform/kubernetes/pkg/api" |
|
5 |
+ kerrs "github.com/GoogleCloudPlatform/kubernetes/pkg/api/errors" |
|
6 |
+ client "github.com/GoogleCloudPlatform/kubernetes/pkg/client" |
|
7 |
+ clientcmdapi "github.com/GoogleCloudPlatform/kubernetes/pkg/client/clientcmd/api" |
|
8 |
+ "github.com/GoogleCloudPlatform/kubernetes/pkg/fields" |
|
9 |
+ "github.com/GoogleCloudPlatform/kubernetes/pkg/labels" |
|
10 |
+ osclient "github.com/openshift/origin/pkg/client" |
|
11 |
+ "github.com/openshift/origin/pkg/diagnostics/discovery" |
|
12 |
+ "github.com/openshift/origin/pkg/diagnostics/log" |
|
13 |
+ "github.com/openshift/origin/pkg/diagnostics/types/diagnostic" |
|
14 |
+ osapi "github.com/openshift/origin/pkg/image/api" |
|
15 |
+ "reflect" |
|
16 |
+ "strings" |
|
17 |
+) |
|
18 |
+ |
|
19 |
+var Diagnostics = map[string]diagnostic.Diagnostic{ |
|
20 |
+ "NodeDefinitions": { |
|
21 |
+ Description: "Check node records on master", |
|
22 |
+ Condition: func(env *discovery.Environment) (skip bool, reason string) { |
|
23 |
+ if env.ClusterAdminFactory == nil { |
|
24 |
+ return true, "Client does not have cluster-admin access and cannot see node records" |
|
25 |
+ } |
|
26 |
+ return false, "" |
|
27 |
+ }, |
|
28 |
+ Run: func(env *discovery.Environment) { |
|
29 |
+ var err error |
|
30 |
+ var nodes *kapi.NodeList |
|
31 |
+ if _, kclient, err := env.ClusterAdminFactory.Clients(); err == nil { |
|
32 |
+ nodes, err = kclient.Nodes().List(labels.LabelSelector{}, fields.Everything()) |
|
33 |
+ } |
|
34 |
+ if err != nil { |
|
35 |
+ env.Log.Errorf("clGetNodesFailed", ` |
|
36 |
+Client error while retrieving node records. Client retrieved records |
|
37 |
+during discovery, so this is likely to be a transient error. Try running |
|
38 |
+diagnostics again. If this message persists, there may be a permissions |
|
39 |
+problem with getting node records. The error was: |
|
40 |
+ |
|
41 |
+(%T) %[1]v`, err) |
|
42 |
+ return |
|
43 |
+ } |
|
44 |
+ for _, node := range nodes.Items { |
|
45 |
+ //pretty.Println("Node record:", node) |
|
46 |
+ var ready *kapi.NodeCondition |
|
47 |
+ for i, condition := range node.Status.Conditions { |
|
48 |
+ switch condition.Type { |
|
49 |
+ // currently only one... used to be more, may be again |
|
50 |
+ case kapi.NodeReady: |
|
51 |
+ ready = &node.Status.Conditions[i] |
|
52 |
+ } |
|
53 |
+ } |
|
54 |
+ //pretty.Println("Node conditions for "+node.Name, ready, schedulable) |
|
55 |
+ if ready == nil || ready.Status != kapi.ConditionTrue { |
|
56 |
+ msg := log.Msg{ |
|
57 |
+ "node": node.Name, |
|
58 |
+ "tmpl": ` |
|
59 |
+Node {{.node}} is defined but is not marked as ready. |
|
60 |
+Ready status is {{.status}} because "{{.reason}}" |
|
61 |
+If the node is not intentionally disabled, check that the master can |
|
62 |
+reach the node hostname for a health check and the node is checking in |
|
63 |
+to the master with the same hostname. |
|
64 |
+ |
|
65 |
+While in this state, pods should not be scheduled to deploy on the node, |
|
66 |
+and any existing scheduled pods will be considered failed and removed. |
|
67 |
+ `, |
|
68 |
+ } |
|
69 |
+ if ready == nil { |
|
70 |
+ msg["status"] = "None" |
|
71 |
+ msg["reason"] = "There is no readiness record." |
|
72 |
+ } else { |
|
73 |
+ msg["status"] = ready.Status |
|
74 |
+ msg["reason"] = ready.Reason |
|
75 |
+ } |
|
76 |
+ env.Log.Warnm("clNodeBroken", msg) |
|
77 |
+ } |
|
78 |
+ } |
|
79 |
+ }, |
|
80 |
+ }, |
|
81 |
+ |
|
82 |
+ "ConfigContexts": { |
|
83 |
+ Description: "Test that client config contexts have no undefined references", |
|
84 |
+ Condition: func(env *discovery.Environment) (skip bool, reason string) { |
|
85 |
+ if env.ClientConfigRaw == nil { |
|
86 |
+ return true, "There is no client config file" |
|
87 |
+ } |
|
88 |
+ return false, "" |
|
89 |
+ }, |
|
90 |
+ Run: func(env *discovery.Environment) { |
|
91 |
+ cc := env.ClientConfigRaw |
|
92 |
+ current := cc.CurrentContext |
|
93 |
+ ccSuccess := false |
|
94 |
+ var ccResult log.Msg //nil |
|
95 |
+ for context := range cc.Contexts { |
|
96 |
+ result, success := TestContext(context, cc) |
|
97 |
+ msg := log.Msg{"tmpl": "For client config context '{{.context}}':{{.result}}", "context": context, "result": result} |
|
98 |
+ if context == current { |
|
99 |
+ ccResult, ccSuccess = msg, success |
|
100 |
+ } else if success { |
|
101 |
+ env.Log.Infom("clientCfgSuccess", msg) |
|
102 |
+ } else { |
|
103 |
+ env.Log.Warnm("clientCfgWarn", msg) |
|
104 |
+ } |
|
105 |
+ } |
|
106 |
+ if _, exists := cc.Contexts[current]; exists { |
|
107 |
+ ccResult["tmpl"] = ` |
|
108 |
+The current context from client config is '{{.context}}' |
|
109 |
+This will be used by default to contact your OpenShift server. |
|
110 |
+` + ccResult["tmpl"].(string) |
|
111 |
+ if ccSuccess { |
|
112 |
+ env.Log.Infom("currentccSuccess", ccResult) |
|
113 |
+ } else { |
|
114 |
+ env.Log.Errorm("currentccWarn", ccResult) |
|
115 |
+ } |
|
116 |
+ } else { // context does not exist |
|
117 |
+ env.Log.Errorm("cConUndef", log.Msg{"tmpl": ` |
|
118 |
+Your client config specifies a current context of '{{.context}}' |
|
119 |
+which is not defined; it is likely that a mistake was introduced while |
|
120 |
+manually editing your config. If this is a simple typo, you may be |
|
121 |
+able to fix it manually. |
|
122 |
+The OpenShift master creates a fresh config when it is started; it may be |
|
123 |
+useful to use this as a base if available.`, "context": current}) |
|
124 |
+ } |
|
125 |
+ }, |
|
126 |
+ }, |
|
127 |
+ |
|
128 |
+ "ClusterRegistry": { |
|
129 |
+ Description: "Check there is a working Docker registry", |
|
130 |
+ Condition: func(env *discovery.Environment) (skip bool, reason string) { |
|
131 |
+ if env.ClusterAdminFactory == nil { |
|
132 |
+ return true, "Client does not have cluster-admin access and cannot see registry objects" |
|
133 |
+ } |
|
134 |
+ return false, "" |
|
135 |
+ }, |
|
136 |
+ Run: func(env *discovery.Environment) { |
|
137 |
+ osClient, kclient, err := env.ClusterAdminFactory.Clients() |
|
138 |
+ if err != nil { |
|
139 |
+ env.Log.Errorf("clGetClientFailed", "Constructing clients failed. This should never happen. Error: (%T) %[1]v", err) |
|
140 |
+ return |
|
141 |
+ } |
|
142 |
+ // retrieve the service if it exists |
|
143 |
+ if service := getRegistryService(kclient, env.Log); service != nil { |
|
144 |
+ // Check that it actually has a pod selected that's running |
|
145 |
+ if pod := getRegistryPod(kclient, service, env.Log); pod != nil { |
|
146 |
+ // Check that an endpoint exists on the service |
|
147 |
+ if endPoint := getRegistryEndpoint(kclient, env.Log); endPoint != nil { |
|
148 |
+ // TODO: Check that endpoints on the service match the pod (hasn't been a problem yet though) |
|
149 |
+ // TODO: Check the logs for that pod for common issues (credentials, DNS resolution failure) |
|
150 |
+ // attempt to create an imagestream and see if it gets the same registry service IP from the service cache |
|
151 |
+ testRegistryImageStream(osClient, service, env.Log) |
|
152 |
+ } |
|
153 |
+ } |
|
154 |
+ } |
|
155 |
+ |
|
156 |
+ }, |
|
157 |
+ }, |
|
158 |
+} |
|
159 |
+ |
|
160 |
+func TestContext(contextName string, config *clientcmdapi.Config) (result string, success bool) { |
|
161 |
+ context, exists := config.Contexts[contextName] |
|
162 |
+ if !exists { |
|
163 |
+ return "client config context '" + contextName + "' is not defined.", false |
|
164 |
+ } |
|
165 |
+ clusterName := context.Cluster |
|
166 |
+ cluster, exists := config.Clusters[clusterName] |
|
167 |
+ if !exists { |
|
168 |
+ return fmt.Sprintf("client config context '%s' has a cluster '%s' which is not defined.", contextName, clusterName), false |
|
169 |
+ } |
|
170 |
+ authName := context.AuthInfo |
|
171 |
+ if _, exists := config.AuthInfos[authName]; !exists { |
|
172 |
+ return fmt.Sprintf("client config context '%s' has a user identity '%s' which is not defined.", contextName, authName), false |
|
173 |
+ } |
|
174 |
+ project := context.Namespace |
|
175 |
+ if project == "" { |
|
176 |
+ project = kapi.NamespaceDefault // OpenShift/k8s fills this in if missing |
|
177 |
+ } |
|
178 |
+ // TODO: actually send a request to see if can connect |
|
179 |
+ return fmt.Sprintf(` |
|
180 |
+The server URL is '%s' |
|
181 |
+The user authentication is '%s' |
|
182 |
+The current project is '%s'`, cluster.Server, authName, project), true |
|
183 |
+} |
|
184 |
+ |
|
185 |
+func getRegistryService(kclient *client.Client, logger *log.Logger) *kapi.Service { |
|
186 |
+ service, err := kclient.Services("default").Get("docker-registry") |
|
187 |
+ if err != nil && reflect.TypeOf(err) == reflect.TypeOf(&kerrs.StatusError{}) { |
|
188 |
+ logger.Warnf("clGetRegFailed", ` |
|
189 |
+There is no "docker-registry" service. This is not strictly required |
|
190 |
+to use OpenShift, however it is required for builds and its absence |
|
191 |
+probably indicates an incomplete installation of OpenShift. |
|
192 |
+ |
|
193 |
+Please use the 'osadm registry' command to create a registry. |
|
194 |
+ `) |
|
195 |
+ return nil |
|
196 |
+ } else if err != nil { |
|
197 |
+ logger.Errorf("clGetRegFailed", ` |
|
198 |
+Client error while retrieving registry service. Client retrieved records |
|
199 |
+during discovery, so this is likely to be a transient error. Try running |
|
200 |
+diagnostics again. If this message persists, there may be a permissions |
|
201 |
+problem with getting records. The error was: |
|
202 |
+ |
|
203 |
+(%T) %[1]v`, err) |
|
204 |
+ return nil |
|
205 |
+ } |
|
206 |
+ logger.Debugf("clRegFound", "Found docker-registry service with ports %v", service.Spec.Ports) |
|
207 |
+ return service |
|
208 |
+} |
|
209 |
+ |
|
210 |
+func getRegistryPod(kclient *client.Client, service *kapi.Service, logger *log.Logger) *kapi.Pod { |
|
211 |
+ pods, err := kclient.Pods("default").List(labels.SelectorFromSet(service.Spec.Selector), fields.Everything()) |
|
212 |
+ if err != nil { |
|
213 |
+ logger.Errorf("clRegListPods", "Finding pods for 'docker-registry' service failed. This should never happen. Error: (%T) %[1]v", err) |
|
214 |
+ return nil |
|
215 |
+ } else if len(pods.Items) < 1 { |
|
216 |
+ logger.Error("clRegNoPods", ` |
|
217 |
+The "docker-registry" service exists but has no associated pods, so it |
|
218 |
+is not available. Builds and deployments that use the registry will fail.`) |
|
219 |
+ return nil |
|
220 |
+ } else if len(pods.Items) > 1 { |
|
221 |
+ logger.Error("clRegNoPods", ` |
|
222 |
+The "docker-registry" service has multiple associated pods. Load-balanced |
|
223 |
+registries are not yet available, so these are likely to have incomplete |
|
224 |
+stores of images. Builds and deployments that use the registry will |
|
225 |
+fail sporadically.`) |
|
226 |
+ return nil |
|
227 |
+ } |
|
228 |
+ pod := &pods.Items[0] |
|
229 |
+ if pod.Status.Phase != kapi.PodRunning { |
|
230 |
+ logger.Errorf("clRegPodDown", ` |
|
231 |
+The "%s" pod for the "docker-registry" service is not running. |
|
232 |
+This may be transient, a scheduling error, or something else. |
|
233 |
+Builds and deployments that require the registry will fail.`, pod.ObjectMeta.Name) |
|
234 |
+ return nil |
|
235 |
+ } |
|
236 |
+ logger.Debugf("clRegPodFound", "Found docker-registry pod with name %s", pod.ObjectMeta.Name) |
|
237 |
+ return pod |
|
238 |
+} |
|
239 |
+ |
|
240 |
+func getRegistryEndpoint(kclient *client.Client, logger *log.Logger) *kapi.Endpoints { |
|
241 |
+ endPoint, err := kclient.Endpoints("default").Get("docker-registry") |
|
242 |
+ if err != nil { |
|
243 |
+ logger.Errorf("clRegGetEP", "Finding endpoints for 'docker-registry' service failed. This should never happen. Error: (%T) %[1]v", err) |
|
244 |
+ return nil |
|
245 |
+ } else if len(endPoint.Subsets) != 1 || len(endPoint.Subsets[0].Addresses) != 1 { |
|
246 |
+ logger.Warn("clRegNoEP", ` |
|
247 |
+The "docker-registry" service exists with one associated pod, but the |
|
248 |
+number of endpoints in the "docker-registry" endpoint object does not |
|
249 |
+match. This mismatch probably indicates a bug in OpenShift and it is |
|
250 |
+likely that builds and deployments that require the registry will fail.`) |
|
251 |
+ return nil |
|
252 |
+ } |
|
253 |
+ logger.Debugf("clRegPodFound", "Found docker-registry endpoint object") |
|
254 |
+ return endPoint |
|
255 |
+} |
|
256 |
+ |
|
257 |
+func testRegistryImageStream(client *osclient.Client, service *kapi.Service, logger *log.Logger) { |
|
258 |
+ imgStream, err := client.ImageStreams("default").Create(&osapi.ImageStream{ObjectMeta: kapi.ObjectMeta{GenerateName: "diagnostic-test-"}}) |
|
259 |
+ if err != nil { |
|
260 |
+ logger.Errorf("clRegISCFail", "Creating test ImageStream failed. Error: (%T) %[1]v", err) |
|
261 |
+ return |
|
262 |
+ } |
|
263 |
+ defer client.ImageStreams("default").Delete(imgStream.ObjectMeta.Name) // TODO: report if deleting fails |
|
264 |
+ imgStream, err = client.ImageStreams("default").Get(imgStream.ObjectMeta.Name) // status is filled in post-create |
|
265 |
+ if err != nil { |
|
266 |
+ logger.Errorf("clRegISCFail", "Getting created test ImageStream failed. Error: (%T) %[1]v", err) |
|
267 |
+ return |
|
268 |
+ } |
|
269 |
+ logger.Debugf("clRegISC", "Created test ImageStream: %[1]v", imgStream) |
|
270 |
+ cacheHost := strings.SplitN(imgStream.Status.DockerImageRepository, "/", 2)[0] |
|
271 |
+ serviceHost := fmt.Sprintf("%s:%d", service.Spec.PortalIP, service.Spec.Ports[0].Port) |
|
272 |
+ if cacheHost != serviceHost { |
|
273 |
+ logger.Errorm("clRegISMismatch", log.Msg{ |
|
274 |
+ "serviceHost": serviceHost, |
|
275 |
+ "cacheHost": cacheHost, |
|
276 |
+ "tmpl": ` |
|
277 |
+Diagnostics created a test ImageStream and compared the registry IP |
|
278 |
+it received to the registry IP available via the docker-registry service. |
|
279 |
+ |
|
280 |
+docker-registry : {{.serviceHost}} |
|
281 |
+ImageStream registry : {{.cacheHost}} |
|
282 |
+ |
|
283 |
+They differ, which probably means that an administrator re-created |
|
284 |
+the docker-registry service but the master has cached the old service |
|
285 |
+IP address. Builds or deployments that use ImageStreams with the wrong |
|
286 |
+docker-registry IP will fail under this condition. |
|
287 |
+ |
|
288 |
+To resolve this issue, restarting the master (to clear the cache) should |
|
289 |
+be sufficient. |
|
290 |
+`}) |
|
291 |
+ } |
|
292 |
+} |
0 | 293 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,104 @@ |
0 |
+package discovery // client |
|
1 |
+ |
|
2 |
+import ( |
|
3 |
+ "fmt" |
|
4 |
+ "github.com/openshift/origin/pkg/diagnostics/log" |
|
5 |
+ "github.com/openshift/origin/pkg/diagnostics/types" |
|
6 |
+ "os" |
|
7 |
+ "os/exec" |
|
8 |
+ "path/filepath" |
|
9 |
+ "runtime" |
|
10 |
+ "strings" |
|
11 |
+) |
|
12 |
+ |
|
13 |
+// ---------------------------------------------------------- |
|
14 |
+// Look for 'osc' and 'openshift' executables |
|
15 |
+func (env *Environment) DiscoverClient() error { |
|
16 |
+ var err error |
|
17 |
+ f := env.Options.ClientDiagOptions.Factory |
|
18 |
+ if config, err := f.OpenShiftClientConfig.RawConfig(); err != nil { |
|
19 |
+ env.Log.Errorf("discCCstart", "Could not read client config: (%T) %[1]v", err) |
|
20 |
+ } else { |
|
21 |
+ env.OsConfig = &config |
|
22 |
+ env.FactoryForContext[config.CurrentContext] = f |
|
23 |
+ } |
|
24 |
+ env.Log.Debug("discSearchExec", "Searching for executables in path:\n "+strings.Join(filepath.SplitList(os.Getenv("PATH")), "\n ")) //TODO for non-Linux OS |
|
25 |
+ env.OscPath = env.findExecAndLog("osc") |
|
26 |
+ if env.OscPath != "" { |
|
27 |
+ env.OscVersion, err = getExecVersion(env.OscPath, env.Log) |
|
28 |
+ } |
|
29 |
+ env.OpenshiftPath = env.findExecAndLog("openshift") |
|
30 |
+ if env.OpenshiftPath != "" { |
|
31 |
+ env.OpenshiftVersion, err = getExecVersion(env.OpenshiftPath, env.Log) |
|
32 |
+ } |
|
33 |
+ if env.OpenshiftVersion.NonZero() && env.OscVersion.NonZero() && !env.OpenshiftVersion.Eq(env.OscVersion) { |
|
34 |
+ env.Log.Warnm("discVersionMM", log.Msg{"osV": env.OpenshiftVersion.GoString(), "oscV": env.OscVersion.GoString(), |
|
35 |
+ "text": fmt.Sprintf("'openshift' version %#v does not match 'osc' version %#v; update or remove the lower version", env.OpenshiftVersion, env.OscVersion)}) |
|
36 |
+ } |
|
37 |
+ return err |
|
38 |
+} |
|
39 |
+ |
|
40 |
+// ---------------------------------------------------------- |
|
41 |
+// Look for a specific executable and log what happens |
|
42 |
+func (env *Environment) findExecAndLog(cmd string) string { |
|
43 |
+ if path := findExecFor(cmd); path != "" { |
|
44 |
+ env.Log.Infom("discExecFound", log.Msg{"command": cmd, "path": path, "tmpl": "Found '{{.command}}' at {{.path}}"}) |
|
45 |
+ return path |
|
46 |
+ } else { |
|
47 |
+ env.Log.Warnm("discExecNoPath", log.Msg{"command": cmd, "tmpl": "No '{{.command}}' executable was found in your path"}) |
|
48 |
+ } |
|
49 |
+ return "" |
|
50 |
+} |
|
51 |
+ |
|
52 |
+// ---------------------------------------------------------- |
|
53 |
+// Look in the path for an executable |
|
54 |
+func findExecFor(cmd string) string { |
|
55 |
+ path, err := exec.LookPath(cmd) |
|
56 |
+ if err == nil { |
|
57 |
+ return path |
|
58 |
+ } |
|
59 |
+ if runtime.GOOS == "windows" { |
|
60 |
+ path, err = exec.LookPath(cmd + ".exe") |
|
61 |
+ if err == nil { |
|
62 |
+ return path |
|
63 |
+ } |
|
64 |
+ } |
|
65 |
+ return "" |
|
66 |
+} |
|
67 |
+ |
|
68 |
+// ---------------------------------------------------------- |
|
69 |
+// Invoke executable's "version" command to determine version |
|
70 |
+func getExecVersion(path string, logger *log.Logger) (version types.Version, err error) { |
|
71 |
+ cmd := exec.Command(path, "version") |
|
72 |
+ var out []byte |
|
73 |
+ out, err = cmd.CombinedOutput() |
|
74 |
+ if err == nil { |
|
75 |
+ var name string |
|
76 |
+ var x, y, z int |
|
77 |
+ if scanned, err := fmt.Sscanf(string(out), "%s v%d.%d.%d", &name, &x, &y, &z); scanned > 1 { |
|
78 |
+ version = types.Version{x, y, z} |
|
79 |
+ logger.Infom("discVersion", log.Msg{"tmpl": "version of {{.command}} is {{.version}}", "command": name, "version": version.GoString()}) |
|
80 |
+ } else { |
|
81 |
+ logger.Errorf("discVersErr", ` |
|
82 |
+Expected version output from '%s version' |
|
83 |
+Could not parse output received: |
|
84 |
+%v |
|
85 |
+Error was: %#v`, path, string(out), err) |
|
86 |
+ } |
|
87 |
+ } else { |
|
88 |
+ switch err.(type) { |
|
89 |
+ case *exec.Error: |
|
90 |
+ logger.Errorf("discVersErr", "error in executing '%v version': %v", path, err) |
|
91 |
+ case *exec.ExitError: |
|
92 |
+ logger.Errorf("discVersErr", ` |
|
93 |
+Executed '%v version' which exited with an error code. |
|
94 |
+This version is likely old or broken. |
|
95 |
+Error was '%v'; |
|
96 |
+Output was: |
|
97 |
+%v`, path, err.Error(), log.LimitLines(string(out), 5)) |
|
98 |
+ default: |
|
99 |
+ logger.Errorf("discVersErr", "executed '%v version' but an error occurred:\n%v\nOutput was:\n%v", path, err, string(out)) |
|
100 |
+ } |
|
101 |
+ } |
|
102 |
+ return version, err |
|
103 |
+} |
0 | 104 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,378 @@ |
0 |
+package discovery // config |
|
1 |
+ |
|
2 |
+import ( |
|
3 |
+ "fmt" |
|
4 |
+ kapi "github.com/GoogleCloudPlatform/kubernetes/pkg/api" |
|
5 |
+ "github.com/GoogleCloudPlatform/kubernetes/pkg/client/clientcmd" |
|
6 |
+ clientcmdapi "github.com/GoogleCloudPlatform/kubernetes/pkg/client/clientcmd/api" |
|
7 |
+ "github.com/GoogleCloudPlatform/kubernetes/pkg/fields" |
|
8 |
+ "github.com/GoogleCloudPlatform/kubernetes/pkg/labels" |
|
9 |
+ "github.com/openshift/origin/pkg/cmd/cli/config" |
|
10 |
+ "github.com/openshift/origin/pkg/cmd/experimental/diagnostics/options" |
|
11 |
+ osclientcmd "github.com/openshift/origin/pkg/cmd/util/clientcmd" |
|
12 |
+ "github.com/openshift/origin/pkg/diagnostics/log" |
|
13 |
+ "io/ioutil" |
|
14 |
+ "os" |
|
15 |
+ "regexp" |
|
16 |
+ "strings" |
|
17 |
+) |
|
18 |
+ |
|
19 |
+/* ---------------------------------------------------------- |
|
20 |
+Look for the client config and try to read it. |
|
21 |
+ |
|
22 |
+We will look in the standard locations, alert the user to what we find |
|
23 |
+as we go along, and try to be helpful. |
|
24 |
+*/ |
|
25 |
+ |
|
26 |
+// ------------------------------------------------------------- |
|
27 |
+// Look for client config file in a number of possible locations |
|
28 |
+func (env *Environment) ReadClientConfigFiles() { |
|
29 |
+ confFlagName := options.FlagAllClientConfigName |
|
30 |
+ confFlag := env.Options.ClientConfigPath // from openshift-diagnostics --client-config |
|
31 |
+ if flags := env.Options.GlobalFlags; flags != nil { |
|
32 |
+ name := config.OpenShiftConfigFlagName |
|
33 |
+ if flag := env.Options.GlobalFlags.Lookup(name); flag != nil { |
|
34 |
+ confFlag = flag.Value.String() // from openshift-diagnostics client --config |
|
35 |
+ confFlagName = name |
|
36 |
+ } |
|
37 |
+ } |
|
38 |
+ var found bool |
|
39 |
+ rules := config.NewOpenShiftClientConfigLoadingRules() |
|
40 |
+ paths := append([]string{confFlag}, rules.Precedence...) |
|
41 |
+ for index, path := range paths { |
|
42 |
+ errmsg := "" |
|
43 |
+ switch index { |
|
44 |
+ case 0: |
|
45 |
+ errmsg = fmt.Sprintf("--"+confFlagName+" specified that client config should be at %s\n", path) |
|
46 |
+ case len(paths) - 1: |
|
47 |
+ // do nothing, the config wasn't found in ~ |
|
48 |
+ default: |
|
49 |
+ if len(os.Getenv(config.OpenShiftConfigPathEnvVar)) != 0 { |
|
50 |
+ errmsg = fmt.Sprintf("$OPENSHIFTCONFIG specified that client config should be at %s\n", path) |
|
51 |
+ } |
|
52 |
+ } |
|
53 |
+ |
|
54 |
+ if rawConfig := openConfigFile(path, errmsg, env.Log); rawConfig != nil && !found { |
|
55 |
+ found = true |
|
56 |
+ env.ClientConfigPath = path |
|
57 |
+ env.ClientConfigRaw = rawConfig |
|
58 |
+ } |
|
59 |
+ } |
|
60 |
+ if found { |
|
61 |
+ if confFlag != "" && confFlag != env.ClientConfigPath { |
|
62 |
+ // found config but not where --config said, so don't continue discovery |
|
63 |
+ env.Log.Errorf("discCCnotFlag", ` |
|
64 |
+The client configuration file was not found where the --%s flag indicated: |
|
65 |
+ %s |
|
66 |
+A config file was found at the following location: |
|
67 |
+ %s |
|
68 |
+If you wish to use this file for client configuration, you can specify it |
|
69 |
+with the --%[1]s flag, or just not specify the flag. |
|
70 |
+ `, confFlagName, confFlag, env.ClientConfigPath) |
|
71 |
+ } else { |
|
72 |
+ // happy path, client config found as expected |
|
73 |
+ env.WillCheck[ClientTarget] = true |
|
74 |
+ } |
|
75 |
+ } else { // not found, decide what to do |
|
76 |
+ if confFlag != "" { // user expected conf file at specific place |
|
77 |
+ env.Log.Errorf("discNoCC", "The client configuration file was not found where --%s='%s' indicated.", confFlagName, confFlag) |
|
78 |
+ } else if !env.Options.ClientDiagOptions.MustCheck { |
|
79 |
+ env.Log.Notice("discSkipCLI", "No client config file found; client diagnostics will not be performed.") |
|
80 |
+ } else { |
|
81 |
+ // user specifically wants to troubleshoot client, but no conf file given |
|
82 |
+ env.Log.Warn("discNoCCfile", "No client config file read; OpenShift client diagnostics will use flags and default configuration.") |
|
83 |
+ env.WillCheck[ClientTarget] = true |
|
84 |
+ adminPaths := []string{ |
|
85 |
+ "/etc/openshift/master/admin.kubeconfig", // enterprise |
|
86 |
+ "/openshift.local.config/master/admin.kubeconfig", // origin systemd |
|
87 |
+ "./openshift.local.config/master/admin.kubeconfig", // origin binary |
|
88 |
+ } |
|
89 |
+ adminWarningF := ` |
|
90 |
+No client config file was available; however, one exists at |
|
91 |
+ %[1]s |
|
92 |
+which is a standard location where the master generates it. |
|
93 |
+If this is what you want to use, you should copy it to a standard location |
|
94 |
+(~/.config/openshift/.config, or the current directory), or you can set the |
|
95 |
+environment variable OPENSHIFTCONFIG in your ~/.bash_profile: |
|
96 |
+ export OPENSHIFTCONFIG=%[1]s |
|
97 |
+If this is not what you want, you should obtain a config file and |
|
98 |
+place it in a standard location. |
|
99 |
+` |
|
100 |
+ // look for it in auto-generated locations when not found properly |
|
101 |
+ for _, path := range adminPaths { |
|
102 |
+ if conf := openConfigFile(path, "", env.Log); conf != nil { |
|
103 |
+ env.Log.Warnf("discCCautoPath", adminWarningF, path) |
|
104 |
+ break |
|
105 |
+ } |
|
106 |
+ } |
|
107 |
+ } |
|
108 |
+ } |
|
109 |
+} |
|
110 |
+ |
|
111 |
+// ---------------------------------------------------------- |
|
112 |
+// Attempt to open file at path as client config |
|
113 |
+// If there is a problem and errmsg is set, log an error |
|
114 |
+func openConfigFile(path string, errmsg string, logger *log.Logger) *clientcmdapi.Config { |
|
115 |
+ var err error |
|
116 |
+ var file *os.File |
|
117 |
+ if path == "" { // empty param/envvar |
|
118 |
+ return nil |
|
119 |
+ } else if file, err = os.Open(path); err == nil { |
|
120 |
+ logger.Debugm("discOpenCC", log.Msg{"tmpl": "Reading client config at {{.path}}", "path": path}) |
|
121 |
+ } else if errmsg == "" { |
|
122 |
+ logger.Debugf("discOpenCCNo", "Could not read client config at %s:\n%#v", path, err) |
|
123 |
+ } else if os.IsNotExist(err) { |
|
124 |
+ logger.Error("discOpenCCNoExist", errmsg+"but that file does not exist.") |
|
125 |
+ } else if os.IsPermission(err) { |
|
126 |
+ logger.Error("discOpenCCNoPerm", errmsg+"but lack permission to read that file.") |
|
127 |
+ } else { |
|
128 |
+ logger.Errorf("discOpenCCErr", "%sbut there was an error opening it:\n%#v", errmsg, err) |
|
129 |
+ } |
|
130 |
+ if file != nil { // it is open for reading |
|
131 |
+ defer file.Close() |
|
132 |
+ if buffer, err := ioutil.ReadAll(file); err != nil { |
|
133 |
+ logger.Errorf("discCCReadErr", "Unexpected error while reading client config file (%s): %v", path, err) |
|
134 |
+ } else if conf, err := clientcmd.Load(buffer); err != nil { |
|
135 |
+ logger.Errorf("discCCYamlErr", ` |
|
136 |
+Error reading YAML from client config file (%s): |
|
137 |
+ %v |
|
138 |
+This file may have been truncated or mis-edited. |
|
139 |
+Please fix, remove, or obtain a new client config`, file.Name(), err) |
|
140 |
+ } else { |
|
141 |
+ logger.Infom("discCCRead", log.Msg{"tmpl": `Successfully read a client config file at '{{.path}}'`, "path": path}) |
|
142 |
+ /* Note, we're not going to use this config file directly. |
|
143 |
+ * Instead, we'll defer to the openshift client code to assimilate |
|
144 |
+ * flags, env vars, and the potential hierarchy of config files |
|
145 |
+ * into an actual configuration that the client uses. |
|
146 |
+ * However, for diagnostic purposes, record the first we find. |
|
147 |
+ */ |
|
148 |
+ return conf |
|
149 |
+ } |
|
150 |
+ } |
|
151 |
+ return nil |
|
152 |
+} |
|
153 |
+ |
|
154 |
+/* The full client configuration may specify multiple contexts, each |
|
155 |
+ * of which could be a different server, a different user, a different |
|
156 |
+ * default project. We want to check which contexts have useful access, |
|
157 |
+ * and record those. At this point, we should already have the factory |
|
158 |
+ * for the current context. Factories embed config and a client cache, |
|
159 |
+ * and since we want to do discovery for every available context, we are |
|
160 |
+ * going to create a factory for each context. We will determine which |
|
161 |
+ * context actually has access to the default project, preferring the |
|
162 |
+ * current (default) context if it does. Connection errors should be |
|
163 |
+ * diagnosed along the way. |
|
164 |
+ */ |
|
165 |
+func (env *Environment) ConfigClient() { |
|
166 |
+ if env.OsConfig != nil { |
|
167 |
+ // TODO: run these in parallel, with a time limit so connection timeouts don't take forever |
|
168 |
+ for cname, context := range env.OsConfig.Contexts { |
|
169 |
+ // set context, create factory, see what's available |
|
170 |
+ if env.FactoryForContext[cname] == nil { |
|
171 |
+ //config := clientcmd.NewNonInteractiveClientConfig(env.Factory.OpenShiftClientConfig, cname, &clientcmd.ConfigOverrides{}) |
|
172 |
+ config := clientcmd.NewNonInteractiveClientConfig(*env.OsConfig, cname, &clientcmd.ConfigOverrides{}) |
|
173 |
+ f := osclientcmd.NewFactory(config) |
|
174 |
+ //f.BindFlags(env.Flags.OpenshiftFlags) |
|
175 |
+ env.FactoryForContext[cname] = f |
|
176 |
+ } |
|
177 |
+ if access := getContextAccess(env.FactoryForContext[cname], cname, context, env.Log); access != nil { |
|
178 |
+ env.AccessForContext[cname] = access |
|
179 |
+ if access.ClusterAdmin && (cname == env.OsConfig.CurrentContext || env.ClusterAdminFactory == nil) { |
|
180 |
+ env.ClusterAdminFactory = env.FactoryForContext[cname] |
|
181 |
+ } |
|
182 |
+ } |
|
183 |
+ } |
|
184 |
+ } |
|
185 |
+} |
|
186 |
+ |
|
187 |
+// for now, only try to determine what namespaces a user can see |
|
188 |
+func getContextAccess(factory *osclientcmd.Factory, ctxName string, ctx clientcmdapi.Context, logger *log.Logger) *ContextAccess { |
|
189 |
+ // start by getting ready to log the result |
|
190 |
+ msgText := "Testing client config context {{.context}}\nServer: {{.server}}\nUser: {{.user}}\n\n" |
|
191 |
+ msg := log.Msg{"id": "discCCctx", "tmpl": msgText} |
|
192 |
+ if config, err := factory.OpenShiftClientConfig.RawConfig(); err != nil { |
|
193 |
+ logger.Errorf("discCCstart", "Could not read client config: (%T) %[1]v", err) |
|
194 |
+ return nil |
|
195 |
+ } else { |
|
196 |
+ msg["context"] = ctxName |
|
197 |
+ msg["server"] = config.Clusters[ctx.Cluster].Server |
|
198 |
+ msg["user"] = ctx.AuthInfo |
|
199 |
+ } |
|
200 |
+ // actually go and request project list from the server |
|
201 |
+ if osclient, _, err := factory.Clients(); err != nil { |
|
202 |
+ logger.Errorf("discCCctxClients", "Failed to create client during discovery with error:\n(%T) %[1]v\nThis is probably an OpenShift bug.", err) |
|
203 |
+ return nil |
|
204 |
+ } else if projects, err := osclient.Projects().List(labels.Everything(), fields.Everything()); err == nil { // success! |
|
205 |
+ list := projects.Items |
|
206 |
+ if len(list) == 0 { |
|
207 |
+ msg["tmpl"] = msgText + "Successfully requested project list, but it is empty, so user has no access to anything." |
|
208 |
+ msg["projects"] = make([]string, 0) |
|
209 |
+ logger.Infom("discCCctxSuccess", msg) |
|
210 |
+ return nil |
|
211 |
+ } |
|
212 |
+ access := &ContextAccess{Projects: make([]string, len(list))} |
|
213 |
+ for i, project := range list { |
|
214 |
+ access.Projects[i] = project.Name |
|
215 |
+ if project.Name == kapi.NamespaceDefault { |
|
216 |
+ access.ClusterAdmin = true |
|
217 |
+ } |
|
218 |
+ } |
|
219 |
+ if access.ClusterAdmin { |
|
220 |
+ msg["tmpl"] = msgText + "Successfully requested project list; has access to default project, so assumed to be a cluster-admin" |
|
221 |
+ logger.Infom("discCCctxSuccess", msg) |
|
222 |
+ } else { |
|
223 |
+ msg["tmpl"] = msgText + "Successfully requested project list; has access to project(s): {{.projectStr}}" |
|
224 |
+ msg["projects"] = access.Projects |
|
225 |
+ msg["projectStr"] = strings.Join(access.Projects, ", ") |
|
226 |
+ logger.Infom("discCCctxSuccess", msg) |
|
227 |
+ } |
|
228 |
+ return access |
|
229 |
+ } else { // something went wrong, so diagnose it |
|
230 |
+ noResolveRx := regexp.MustCompile("dial tcp: lookup (\\S+): no such host") |
|
231 |
+ unknownCaMsg := "x509: certificate signed by unknown authority" |
|
232 |
+ unneededCaMsg := "specifying a root certificates file with the insecure flag is not allowed" |
|
233 |
+ invalidCertNameRx := regexp.MustCompile("x509: certificate is valid for (\\S+, )+not (\\S+)") |
|
234 |
+ connRefusedRx := regexp.MustCompile("dial tcp (\\S+): connection refused") |
|
235 |
+ connTimeoutRx := regexp.MustCompile("dial tcp (\\S+): (?:connection timed out|i/o timeout)") |
|
236 |
+ unauthenticatedMsg := `403 Forbidden: Forbidden: "/osapi/v1beta1/projects?namespace=" denied by default` |
|
237 |
+ unauthorizedRx := regexp.MustCompile("401 Unauthorized: Unauthorized$") |
|
238 |
+ |
|
239 |
+ malformedHTTPMsg := "malformed HTTP response" |
|
240 |
+ malformedTLSMsg := "tls: oversized record received with length" |
|
241 |
+ |
|
242 |
+ // interpret the error message for mere mortals |
|
243 |
+ errm := err.Error() |
|
244 |
+ var reason, errId string |
|
245 |
+ switch { |
|
246 |
+ case noResolveRx.MatchString(errm): |
|
247 |
+ errId, reason = "clientNoResolve", ` |
|
248 |
+This usually means that the hostname does not resolve to an IP. |
|
249 |
+Hostnames should usually be resolved via DNS or an /etc/hosts file. |
|
250 |
+Ensure that the hostname resolves correctly from your host before proceeding. |
|
251 |
+Of course, your config could also simply have the wrong hostname specified. |
|
252 |
+` |
|
253 |
+ case strings.Contains(errm, unknownCaMsg): |
|
254 |
+ errId, reason = "clientUnknownCa", ` |
|
255 |
+This means that we cannot validate the certificate in use by the |
|
256 |
+OpenShift API server, so we cannot securely communicate with it. |
|
257 |
+Connections could be intercepted and your credentials stolen. |
|
258 |
+ |
|
259 |
+Since the server certificate we see when connecting is not validated |
|
260 |
+by public certificate authorities (CAs), you probably need to specify a |
|
261 |
+certificate from a private CA to validate the connection. |
|
262 |
+ |
|
263 |
+Your config may be specifying the wrong CA cert, or none, or there |
|
264 |
+could actually be a man-in-the-middle attempting to intercept your |
|
265 |
+connection. If you are unconcerned about any of this, you can add the |
|
266 |
+--insecure-skip-tls-verify flag to bypass secure (TLS) verification, |
|
267 |
+but this is risky and should not be necessary. |
|
268 |
+** Connections could be intercepted and your credentials stolen. ** |
|
269 |
+` |
|
270 |
+ case strings.Contains(errm, unneededCaMsg): |
|
271 |
+ errId, reason = "clientUnneededCa", ` |
|
272 |
+This means that for client connections to the OpenShift API server, you |
|
273 |
+(or your kubeconfig) specified both a validating certificate authority |
|
274 |
+and that the client should bypass connection security validation. |
|
275 |
+ |
|
276 |
+This is not allowed because it is likely to be a mistake. |
|
277 |
+ |
|
278 |
+If you want to use --insecure-skip-tls-verify to bypass security (which |
|
279 |
+is usually a bad idea anyway), then you need to also clear the CA cert |
|
280 |
+from your command line options or kubeconfig file(s). Of course, it |
|
281 |
+would be far better to obtain and use a correct CA cert. |
|
282 |
+` |
|
283 |
+ case invalidCertNameRx.MatchString(errm): |
|
284 |
+ match := invalidCertNameRx.FindStringSubmatch(errm) |
|
285 |
+ serverHost := match[len(match)-1] |
|
286 |
+ errId, reason = "clientInvCertName", fmt.Sprintf(` |
|
287 |
+This means that the certificate in use by the OpenShift API server |
|
288 |
+(master) does not match the hostname by which you are addressing it: |
|
289 |
+ %s |
|
290 |
+so a secure connection is not allowed. In theory, this *could* mean that |
|
291 |
+someone is intercepting your connection and presenting a certificate |
|
292 |
+that is valid but for a different server, which is why secure validation |
|
293 |
+fails in this case. |
|
294 |
+ |
|
295 |
+However, the most likely explanation is that the server certificate |
|
296 |
+needs to be updated to include the name you are using to reach it. |
|
297 |
+ |
|
298 |
+If the OpenShift server is generating its own certificates (which |
|
299 |
+is default), then the --public-master flag on the OpenShift master is |
|
300 |
+usually the easiest way to do this. If you need something more complicated |
|
301 |
+(for instance, multiple public addresses for the API, or your own CA), |
|
302 |
+then you will need to custom-generate the server certificate with the |
|
303 |
+right names yourself. |
|
304 |
+ |
|
305 |
+If you are unconcerned about any of this, you can add the |
|
306 |
+--insecure-skip-tls-verify flag to bypass secure (TLS) verification, |
|
307 |
+but this is risky and should not be necessary. |
|
308 |
+** Connections could be intercepted and your credentials stolen. ** |
|
309 |
+`, serverHost) |
|
310 |
+ case connRefusedRx.MatchString(errm): |
|
311 |
+ errId, reason = "clientInvCertName", ` |
|
312 |
+This means that when we tried to connect to the OpenShift API |
|
313 |
+server (master), we reached the host, but nothing accepted the port |
|
314 |
+connection. This could mean that the OpenShift master is stopped, or |
|
315 |
+that a firewall or security policy is blocking access at that port. |
|
316 |
+ |
|
317 |
+You will not be able to connect or do anything at all with OpenShift |
|
318 |
+until this server problem is resolved or you specify a corrected |
|
319 |
+server address.` |
|
320 |
+ case connTimeoutRx.MatchString(errm): |
|
321 |
+ errId, reason = "clientConnTimeout", ` |
|
322 |
+This means that when we tried to connect to the OpenShift API server |
|
323 |
+(master), we could not reach the host at all. |
|
324 |
+* You may have specified the wrong host address. |
|
325 |
+* This could mean the host is completely unavailable (down). |
|
326 |
+* This could indicate a routing problem or a firewall that simply |
|
327 |
+ drops requests rather than responding by reseting the connection. |
|
328 |
+* It does not generally mean that DNS name resolution failed (which |
|
329 |
+ would be a different error) though the problem could be that it |
|
330 |
+ gave the wrong address.` |
|
331 |
+ case strings.Contains(errm, malformedHTTPMsg): |
|
332 |
+ errId, reason = "clientMalformedHTTP", ` |
|
333 |
+This means that when we tried to connect to the OpenShift API server |
|
334 |
+(master) with a plain HTTP connection, the server did not speak |
|
335 |
+HTTP back to us. The most common explanation is that a secure server |
|
336 |
+is listening but you specified an http: connection instead of https:. |
|
337 |
+There could also be another service listening at the intended port |
|
338 |
+speaking some other protocol entirely. |
|
339 |
+ |
|
340 |
+You will not be able to connect or do anything at all with OpenShift |
|
341 |
+until this server problem is resolved or you specify a corrected |
|
342 |
+server address.` |
|
343 |
+ case strings.Contains(errm, malformedTLSMsg): |
|
344 |
+ errId, reason = "clientMalformedTLS", ` |
|
345 |
+This means that when we tried to connect to the OpenShift API server |
|
346 |
+(master) with a secure HTTPS connection, the server did not speak |
|
347 |
+HTTPS back to us. The most common explanation is that the server |
|
348 |
+listening at that port is not the secure server you expected - it |
|
349 |
+may be a non-secure HTTP server or the wrong service may be |
|
350 |
+listening there, or you may have specified an incorrect port. |
|
351 |
+ |
|
352 |
+You will not be able to connect or do anything at all with OpenShift |
|
353 |
+until this server problem is resolved or you specify a corrected |
|
354 |
+server address.` |
|
355 |
+ case strings.Contains(errm, unauthenticatedMsg): |
|
356 |
+ errId, reason = "clientUnauthn", ` |
|
357 |
+This means that when we tried to make a request to the OpenShift API |
|
358 |
+server, your kubeconfig did not present valid credentials to |
|
359 |
+authenticate your client. Credentials generally consist of a client |
|
360 |
+key/certificate or an access token. Your kubeconfig may not have |
|
361 |
+presented any, or they may be invalid.` |
|
362 |
+ case unauthorizedRx.MatchString(errm): |
|
363 |
+ errId, reason = "clientUnauthz", ` |
|
364 |
+This means that when we tried to make a request to the OpenShift API |
|
365 |
+server, the request required credentials that were not presented. |
|
366 |
+This can happen when an authentication token expires. Try logging in |
|
367 |
+with this user again.` |
|
368 |
+ default: |
|
369 |
+ errId, reason = "clientUnknownConnErr", `Diagnostics does not have an explanation for what this means. Please report this error so one can be added.` |
|
370 |
+ } |
|
371 |
+ errMsg := fmt.Sprintf("(%T) %[1]v", err) |
|
372 |
+ msg["tmpl"] = msgText + errMsg + reason |
|
373 |
+ msg["errMsg"] = errMsg |
|
374 |
+ logger.Errorm(errId, msg) |
|
375 |
+ } |
|
376 |
+ return nil |
|
377 |
+} |
0 | 378 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,80 @@ |
0 |
+package discovery |
|
1 |
+ |
|
2 |
+import ( |
|
3 |
+ kclientcmdapi "github.com/GoogleCloudPlatform/kubernetes/pkg/client/clientcmd/api" |
|
4 |
+ "github.com/openshift/origin/pkg/cmd/experimental/diagnostics/options" |
|
5 |
+ mconfigapi "github.com/openshift/origin/pkg/cmd/server/api" |
|
6 |
+ osclientcmd "github.com/openshift/origin/pkg/cmd/util/clientcmd" |
|
7 |
+ "github.com/openshift/origin/pkg/diagnostics/log" |
|
8 |
+ "github.com/openshift/origin/pkg/diagnostics/types" |
|
9 |
+) |
|
10 |
+ |
|
11 |
+// One env instance is created and filled in by discovery. |
|
12 |
+// Then it should be considered immutable while diagnostics use it. |
|
13 |
+type Environment struct { |
|
14 |
+ // the options that were set by command invocation |
|
15 |
+ Options *options.AllDiagnosticsOptions |
|
16 |
+ |
|
17 |
+ // used to print discovery and diagnostic logs |
|
18 |
+ Log *log.Logger |
|
19 |
+ |
|
20 |
+ // do we have enough config to diagnose master,node,client? |
|
21 |
+ WillCheck map[Target]bool |
|
22 |
+ |
|
23 |
+ // general system info |
|
24 |
+ HasBash bool // for non-Linux clients, will not have bash... |
|
25 |
+ HasSystemd bool // not even all Linux has systemd |
|
26 |
+ SystemdUnits map[string]types.SystemdUnit // list of relevant units present on system |
|
27 |
+ |
|
28 |
+ // outcome from looking for executables |
|
29 |
+ OscPath string |
|
30 |
+ OscVersion types.Version |
|
31 |
+ OpenshiftPath string |
|
32 |
+ OpenshiftVersion types.Version |
|
33 |
+ |
|
34 |
+ // saved results from client discovery |
|
35 |
+ ClientConfigPath string // first client config file found, if any |
|
36 |
+ ClientConfigRaw *kclientcmdapi.Config // available to analyze ^^ |
|
37 |
+ OsConfig *kclientcmdapi.Config // actual merged client configuration |
|
38 |
+ FactoryForContext map[string]*osclientcmd.Factory // one for each known context |
|
39 |
+ AccessForContext map[string]*ContextAccess // one for each context that has access to anything |
|
40 |
+ ClusterAdminFactory *osclientcmd.Factory // factory we will use for cluster-admin access (could easily be nil) |
|
41 |
+ |
|
42 |
+ // saved results from master discovery |
|
43 |
+ MasterConfig *mconfigapi.MasterConfig // actual config determined from flags/file |
|
44 |
+ |
|
45 |
+ // saved results from node discovery |
|
46 |
+ NodeConfig *mconfigapi.NodeConfig // actual config determined from flags/file |
|
47 |
+} |
|
48 |
+ |
|
49 |
+type ContextAccess struct { |
|
50 |
+ Projects []string |
|
51 |
+ ClusterAdmin bool // has access to see stuff only cluster-admin should |
|
52 |
+} |
|
53 |
+ |
|
54 |
+func NewEnvironment(opts *options.AllDiagnosticsOptions, logger *log.Logger) *Environment { |
|
55 |
+ return &Environment{ |
|
56 |
+ Options: opts, |
|
57 |
+ Log: logger, |
|
58 |
+ SystemdUnits: make(map[string]types.SystemdUnit), |
|
59 |
+ WillCheck: make(map[Target]bool), |
|
60 |
+ FactoryForContext: make(map[string]*osclientcmd.Factory), |
|
61 |
+ AccessForContext: make(map[string]*ContextAccess), |
|
62 |
+ } |
|
63 |
+} |
|
64 |
+ |
|
65 |
+// helpful translator |
|
66 |
+func (env *Environment) DefaultFactory() *osclientcmd.Factory { |
|
67 |
+ if env.FactoryForContext != nil && env.OsConfig != nil { // no need to panic if missing... |
|
68 |
+ return env.FactoryForContext[env.OsConfig.CurrentContext] |
|
69 |
+ } |
|
70 |
+ return nil |
|
71 |
+} |
|
72 |
+ |
|
73 |
+type Target string |
|
74 |
+ |
|
75 |
+const ( |
|
76 |
+ ClientTarget Target = "client" |
|
77 |
+ MasterTarget Target = "master" |
|
78 |
+ NodeTarget Target = "node" |
|
79 |
+) |
0 | 80 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,79 @@ |
0 |
+package discovery |
|
1 |
+ |
|
2 |
+import ( |
|
3 |
+ mconfigapilatest "github.com/openshift/origin/pkg/cmd/server/api/latest" |
|
4 |
+ "github.com/openshift/origin/pkg/cmd/server/start" |
|
5 |
+) |
|
6 |
+ |
|
7 |
+const StandardMasterConfPath string = "/etc/openshift/master/master-config.yaml" |
|
8 |
+ |
|
9 |
+func (env *Environment) DiscoverMaster() { |
|
10 |
+ // first, determine if we even have a master config |
|
11 |
+ options := env.Options.MasterDiagOptions |
|
12 |
+ if env.Options.MasterConfigPath != "" { // specified master conf, it has to load or we choke |
|
13 |
+ options.MasterStartOptions.MasterArgs = start.NewDefaultMasterArgs() // and don't set any args |
|
14 |
+ if env.tryMasterConfig(true) { |
|
15 |
+ env.WillCheck[MasterTarget] = true |
|
16 |
+ } |
|
17 |
+ } else { // user did not indicate config file |
|
18 |
+ env.Log.Debug("discMCnofile", "No top-level --master-config file specified") |
|
19 |
+ if !options.MustCheck { |
|
20 |
+ // general command, user couldn't indicate server flags; |
|
21 |
+ // look for master config in standard location(s) |
|
22 |
+ env.tryStandardMasterConfig() // or give up. |
|
23 |
+ } else { // assume user provided flags like actual master. |
|
24 |
+ env.tryMasterConfig(true) |
|
25 |
+ env.WillCheck[MasterTarget] = true // regardless |
|
26 |
+ } |
|
27 |
+ } |
|
28 |
+ if !env.WillCheck[MasterTarget] { |
|
29 |
+ env.Log.Notice("discMCnone", "No master config found; master diagnostics will not be performed.") |
|
30 |
+ } |
|
31 |
+} |
|
32 |
+ |
|
33 |
+func (env *Environment) tryMasterConfig(errOnFail bool) bool /* worked? */ { |
|
34 |
+ options := env.Options.MasterDiagOptions.MasterStartOptions |
|
35 |
+ logOnFail := env.Log.Debugf |
|
36 |
+ if errOnFail { |
|
37 |
+ logOnFail = env.Log.Errorf |
|
38 |
+ } |
|
39 |
+ if err := options.Complete(); err != nil { |
|
40 |
+ logOnFail("discMCstart", "Could not read master config options: (%T) %[1]v", err) |
|
41 |
+ return false |
|
42 |
+ } else if err = options.Validate([]string{}); err != nil { |
|
43 |
+ logOnFail("discMCstart", "Could not read master config options: (%T) %[1]v", err) |
|
44 |
+ return false |
|
45 |
+ } |
|
46 |
+ var err error |
|
47 |
+ if path := options.ConfigFile; path != "" { |
|
48 |
+ env.Log.Debugf("discMCfile", "Looking for master config file at '%s'", path) |
|
49 |
+ if env.MasterConfig, err = mconfigapilatest.ReadAndResolveMasterConfig(path); err != nil { |
|
50 |
+ logOnFail("discMCfail", "Could not read master config file '%s':\n(%T) %[2]v", path, err) |
|
51 |
+ return false |
|
52 |
+ } |
|
53 |
+ env.Log.Infof("discMCfound", "Found a master config file:\n%[1]s", path) |
|
54 |
+ return true |
|
55 |
+ } else { |
|
56 |
+ if env.MasterConfig, err = options.MasterArgs.BuildSerializeableMasterConfig(); err != nil { |
|
57 |
+ logOnFail("discMCopts", "Could not build a master config from flags:\n(%T) %[1]v", err) |
|
58 |
+ return false |
|
59 |
+ } |
|
60 |
+ env.Log.Infof("discMCfound", "No master config file, using any flags for configuration.") |
|
61 |
+ } |
|
62 |
+ return false |
|
63 |
+} |
|
64 |
+ |
|
65 |
+func (env *Environment) tryStandardMasterConfig() bool /* worked? */ { |
|
66 |
+ env.Log.Debug("discMCnoflags", "No master config flags specified, will try standard config location") |
|
67 |
+ options := env.Options.MasterDiagOptions.MasterStartOptions |
|
68 |
+ options.ConfigFile = StandardMasterConfPath |
|
69 |
+ options.MasterArgs = start.NewDefaultMasterArgs() |
|
70 |
+ if env.tryMasterConfig(false) { |
|
71 |
+ env.Log.Debug("discMCdefault", "Using master config file at "+StandardMasterConfPath) |
|
72 |
+ env.WillCheck[MasterTarget] = true |
|
73 |
+ return true |
|
74 |
+ } else { // otherwise, we just don't do master diagnostics |
|
75 |
+ env.Log.Debugf("discMCnone", "Not using master config file at "+StandardMasterConfPath+" - will not do master diagnostics.") |
|
76 |
+ } |
|
77 |
+ return false |
|
78 |
+} |
0 | 79 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,80 @@ |
0 |
+package discovery |
|
1 |
+ |
|
2 |
+import ( |
|
3 |
+ mconfigapilatest "github.com/openshift/origin/pkg/cmd/server/api/latest" |
|
4 |
+ "github.com/openshift/origin/pkg/cmd/server/start" |
|
5 |
+) |
|
6 |
+ |
|
7 |
+const StandardNodeConfPath string = "/etc/openshift/node/node-config.yaml" |
|
8 |
+ |
|
9 |
+func (env *Environment) DiscoverNode() { |
|
10 |
+ // first, determine if we even have a node config |
|
11 |
+ options := env.Options.NodeDiagOptions |
|
12 |
+ if env.Options.NodeConfigPath != "" { // specified node conf, it has to load or we choke |
|
13 |
+ options.NodeStartOptions.NodeArgs = start.NewDefaultNodeArgs() // and don't set any args |
|
14 |
+ if env.tryNodeConfig(true) { |
|
15 |
+ env.WillCheck[NodeTarget] = true |
|
16 |
+ } |
|
17 |
+ } else { // user did not indicate config file |
|
18 |
+ env.Log.Debug("discNCnofile", "No node config file specified") |
|
19 |
+ if !options.MustCheck { |
|
20 |
+ // general command, user couldn't indicate server flags; |
|
21 |
+ // look for node config in standard location(s) |
|
22 |
+ env.tryStandardNodeConfig() // or give up. |
|
23 |
+ } else { // assume user provided flags like actual node. |
|
24 |
+ env.tryNodeConfig(true) |
|
25 |
+ env.WillCheck[NodeTarget] = true // regardless |
|
26 |
+ } |
|
27 |
+ } |
|
28 |
+ if !env.WillCheck[NodeTarget] { |
|
29 |
+ env.Log.Notice("discNCnone", "No node config found; node diagnostics will not be performed.") |
|
30 |
+ } |
|
31 |
+} |
|
32 |
+ |
|
33 |
+func (env *Environment) tryNodeConfig(errOnFail bool) bool /* worked */ { |
|
34 |
+ options := env.Options.NodeDiagOptions.NodeStartOptions |
|
35 |
+ //pretty.Println("nodeconfig options are:", options) |
|
36 |
+ logOnFail := env.Log.Debugf |
|
37 |
+ if errOnFail { |
|
38 |
+ logOnFail = env.Log.Errorf |
|
39 |
+ } |
|
40 |
+ if err := options.Complete(); err != nil { |
|
41 |
+ logOnFail("discNCstart", "Could not read node config options: (%T) %[1]v", err) |
|
42 |
+ return false |
|
43 |
+ } else if err = options.Validate([]string{}); err != nil { |
|
44 |
+ logOnFail("discNCstart", "Could not read node config options: (%T) %[1]v", err) |
|
45 |
+ return false |
|
46 |
+ } |
|
47 |
+ var err error |
|
48 |
+ if path := options.ConfigFile; path != "" { |
|
49 |
+ env.Log.Debugf("discNCfile", "Looking for node config file at '%s'", path) |
|
50 |
+ if env.NodeConfig, err = mconfigapilatest.ReadAndResolveNodeConfig(path); err != nil { |
|
51 |
+ logOnFail("discNCfail", "Could not read node config file '%s':\n(%T) %[2]v", path, err) |
|
52 |
+ return false |
|
53 |
+ } |
|
54 |
+ env.Log.Infof("discNCfound", "Found a node config file:\n%[1]s", path) |
|
55 |
+ return true |
|
56 |
+ } else { |
|
57 |
+ if env.NodeConfig, err = options.NodeArgs.BuildSerializeableNodeConfig(); err != nil { |
|
58 |
+ logOnFail("discNCopts", "Could not build a node config from flags:\n(%T) %[1]v", err) |
|
59 |
+ return false |
|
60 |
+ } |
|
61 |
+ env.Log.Infof("discNCfound", "No node config file, using any flags for configuration.") |
|
62 |
+ } |
|
63 |
+ return false |
|
64 |
+} |
|
65 |
+ |
|
66 |
+func (env *Environment) tryStandardNodeConfig() bool /*worked*/ { |
|
67 |
+ env.Log.Debug("discNCnoflags", "No node config flags specified, will try standard config location") |
|
68 |
+ options := env.Options.NodeDiagOptions.NodeStartOptions |
|
69 |
+ options.ConfigFile = StandardNodeConfPath |
|
70 |
+ options.NodeArgs = start.NewDefaultNodeArgs() |
|
71 |
+ if env.tryNodeConfig(false) { |
|
72 |
+ env.Log.Debug("discNCdefault", "Using node config file at "+StandardNodeConfPath) |
|
73 |
+ env.WillCheck[NodeTarget] = true |
|
74 |
+ return true |
|
75 |
+ } else { // otherwise, we just don't do node diagnostics |
|
76 |
+ env.Log.Debugf("discNCnone", "Not using node config file at "+StandardNodeConfPath+" - will not do node diagnostics.") |
|
77 |
+ } |
|
78 |
+ return false |
|
79 |
+} |
0 | 80 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,19 @@ |
0 |
+package discovery |
|
1 |
+ |
|
2 |
+import ( |
|
3 |
+ "os/exec" |
|
4 |
+ "runtime" |
|
5 |
+) |
|
6 |
+ |
|
7 |
+// ---------------------------------------------------------- |
|
8 |
+// Determine what we need to about the OS |
|
9 |
+func (env *Environment) DiscoverOperatingSystem() { |
|
10 |
+ if runtime.GOOS == "linux" { |
|
11 |
+ if _, err := exec.LookPath("systemctl"); err == nil { |
|
12 |
+ env.HasSystemd = true |
|
13 |
+ } |
|
14 |
+ if _, err := exec.LookPath("/bin/bash"); err == nil { |
|
15 |
+ env.HasBash = true |
|
16 |
+ } |
|
17 |
+ } |
|
18 |
+} |
0 | 19 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,58 @@ |
0 |
+package discovery |
|
1 |
+ |
|
2 |
+import ( |
|
3 |
+ "fmt" |
|
4 |
+ "github.com/openshift/origin/pkg/diagnostics/log" |
|
5 |
+ "github.com/openshift/origin/pkg/diagnostics/types" |
|
6 |
+ "os/exec" |
|
7 |
+ "strings" |
|
8 |
+) |
|
9 |
+ |
|
10 |
+// ---------------------------------------------------------- |
|
11 |
+// Determine what systemd units are relevant, if any |
|
12 |
+// Run after determining whether systemd and openshift are present. |
|
13 |
+func (env *Environment) DiscoverSystemd() { |
|
14 |
+ env.Log.Notice("discBegin", "Beginning systemd discovery") |
|
15 |
+ for _, name := range []string{"openshift", "openshift-master", "openshift-node", "openshift-sdn-master", "openshift-sdn-node", "docker", "openvswitch", "iptables", "etcd", "kubernetes"} { |
|
16 |
+ if env.SystemdUnits[name] = discoverSystemdUnit(name, env.Log); env.SystemdUnits[name].Exists { |
|
17 |
+ env.Log.Debugm("discUnit", log.Msg{"tmpl": "Saw systemd unit {{.unit}}", "unit": name}) |
|
18 |
+ } |
|
19 |
+ } |
|
20 |
+ env.Log.Debugf("discUnits", "%v", env.SystemdUnits) |
|
21 |
+} |
|
22 |
+ |
|
23 |
+func discoverSystemdUnit(name string, logger *log.Logger) types.SystemdUnit { |
|
24 |
+ unit := types.SystemdUnit{Name: name, Exists: false} |
|
25 |
+ if output, err := exec.Command("systemctl", "show", name).Output(); err != nil { |
|
26 |
+ logger.Errorm("discCtlErr", log.Msg{"tmpl": "Error running `systemctl show {{.unit}}`: {{.error}}\nCannot analyze systemd units.", "unit": name, "error": err.Error()}) |
|
27 |
+ } else { |
|
28 |
+ attr := make(map[string]string) |
|
29 |
+ for _, line := range strings.Split(string(output), "\n") { |
|
30 |
+ elements := strings.SplitN(line, "=", 2) // Looking for "Foo=Bar" settings |
|
31 |
+ if len(elements) == 2 { // found that, record it... |
|
32 |
+ attr[elements[0]] = elements[1] |
|
33 |
+ } |
|
34 |
+ } |
|
35 |
+ if val := attr["LoadState"]; val != "loaded" { |
|
36 |
+ logger.Debugm("discUnitENoExist", log.Msg{"tmpl": "systemd unit '{{.unit}}' does not exist. LoadState is '{{.state}}'", "unit": name, "state": val}) |
|
37 |
+ return unit // doesn't exist - leave everything blank |
|
38 |
+ } else { |
|
39 |
+ unit.Exists = true |
|
40 |
+ } |
|
41 |
+ if val := attr["UnitFileState"]; val == "enabled" { |
|
42 |
+ logger.Debugm("discUnitEnabled", log.Msg{"tmpl": "systemd unit '{{.unit}}' is enabled - it will start automatically at boot.", "unit": name}) |
|
43 |
+ unit.Enabled = true |
|
44 |
+ } else { |
|
45 |
+ logger.Debugm("discUnitNoEnable", log.Msg{"tmpl": "systemd unit '{{.unit}}' is not enabled - it does not start automatically at boot. UnitFileState is '{{.state}}'", "unit": name, "state": val}) |
|
46 |
+ } |
|
47 |
+ if val := attr["ActiveState"]; val == "active" { |
|
48 |
+ logger.Debugm("discUnitActive", log.Msg{"tmpl": "systemd unit '{{.unit}}' is currently running", "unit": name}) |
|
49 |
+ unit.Active = true |
|
50 |
+ } else { |
|
51 |
+ logger.Debugm("discUnitNoActive", log.Msg{"unit": name, "state": val, "exit": unit.ExitStatus, |
|
52 |
+ "tmpl": "systemd unit '{{.unit}}' is not currently running. ActiveState is '{{.state}}'; exit code was {{.exit}}."}) |
|
53 |
+ } |
|
54 |
+ fmt.Sscanf(attr["StatusErrno"], "%d", &unit.ExitStatus) // ignore errors... |
|
55 |
+ } |
|
56 |
+ return unit |
|
57 |
+} |
0 | 58 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,33 @@ |
0 |
+package log |
|
1 |
+ |
|
2 |
+import ( |
|
3 |
+ "encoding/json" |
|
4 |
+ "fmt" |
|
5 |
+ "io" |
|
6 |
+) |
|
7 |
+ |
|
8 |
+type jsonLogger struct { |
|
9 |
+ out io.Writer |
|
10 |
+ logStarted bool |
|
11 |
+ logFinished bool |
|
12 |
+} |
|
13 |
+ |
|
14 |
+func (j *jsonLogger) Write(l Level, msg Msg) { |
|
15 |
+ if j.logStarted { |
|
16 |
+ fmt.Fprintln(j.out, ",") |
|
17 |
+ } else { |
|
18 |
+ fmt.Fprintln(j.out, "[") |
|
19 |
+ } |
|
20 |
+ j.logStarted = true |
|
21 |
+ msg["level"] = l.Name |
|
22 |
+ b, _ := json.MarshalIndent(msg, " ", " ") |
|
23 |
+ fmt.Print(" " + string(b)) |
|
24 |
+} |
|
25 |
+func (j *jsonLogger) Finish() { |
|
26 |
+ if j.logStarted { |
|
27 |
+ fmt.Fprintln(j.out, "\n]") |
|
28 |
+ } else if !j.logFinished { |
|
29 |
+ fmt.Fprintln(j.out, "[]") |
|
30 |
+ } |
|
31 |
+ j.logFinished = true |
|
32 |
+} |
0 | 33 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,205 @@ |
0 |
+package log |
|
1 |
+ |
|
2 |
+import ( |
|
3 |
+ "bytes" |
|
4 |
+ "errors" |
|
5 |
+ "fmt" |
|
6 |
+ ct "github.com/daviddengcn/go-colortext" |
|
7 |
+ "io" |
|
8 |
+ "strings" |
|
9 |
+ "text/template" |
|
10 |
+) |
|
11 |
+ |
|
12 |
+type Level struct { |
|
13 |
+ Level int |
|
14 |
+ Name string |
|
15 |
+ Prefix string |
|
16 |
+ Color ct.Color |
|
17 |
+ Bright bool |
|
18 |
+} |
|
19 |
+ |
|
20 |
+type Logger struct { |
|
21 |
+ logger loggerType |
|
22 |
+ level Level |
|
23 |
+ warningsSeen int |
|
24 |
+ errorsSeen int |
|
25 |
+} |
|
26 |
+ |
|
27 |
+// Internal type to deal with different log formats |
|
28 |
+type loggerType interface { |
|
29 |
+ Write(Level, Msg) |
|
30 |
+ Finish() |
|
31 |
+} |
|
32 |
+ |
|
33 |
+func NewLogger(setLevel int, setFormat string, out io.Writer) (*Logger, error) { |
|
34 |
+ |
|
35 |
+ var logger loggerType |
|
36 |
+ switch setFormat { |
|
37 |
+ case "json": |
|
38 |
+ logger = &jsonLogger{out: out} |
|
39 |
+ case "yaml": |
|
40 |
+ logger = &yamlLogger{out: out} |
|
41 |
+ case "text": |
|
42 |
+ logger = newTextLogger(out) |
|
43 |
+ default: |
|
44 |
+ return nil, errors.New("Output format must be one of: text, json, yaml") |
|
45 |
+ } |
|
46 |
+ |
|
47 |
+ var err error = nil |
|
48 |
+ level := DebugLevel |
|
49 |
+ switch setLevel { |
|
50 |
+ case 0: |
|
51 |
+ level = ErrorLevel |
|
52 |
+ case 1: |
|
53 |
+ level = WarnLevel |
|
54 |
+ case 2: |
|
55 |
+ level = NoticeLevel |
|
56 |
+ case 3: |
|
57 |
+ level = InfoLevel |
|
58 |
+ case 4: |
|
59 |
+ // Debug, also default for invalid numbers below |
|
60 |
+ default: |
|
61 |
+ err = errors.New("Invalid diagnostic level; must be 0-4") |
|
62 |
+ } |
|
63 |
+ return &Logger{ |
|
64 |
+ logger: logger, |
|
65 |
+ level: level, |
|
66 |
+ }, err |
|
67 |
+} |
|
68 |
+ |
|
69 |
+// a map message type to throw type safety and method signatures out the window: |
|
70 |
+type Msg map[string]interface{} |
|
71 |
+ |
|
72 |
+/* a Msg can be expected to have the following entries: |
|
73 |
+ * "id": an identifier unique to the message being logged, intended for json/yaml output |
|
74 |
+ * so that automation can recognize specific messages without trying to parse them. |
|
75 |
+ * "text": human-readable message text |
|
76 |
+ * "tmpl": a template string as understood by text/template that can use any of the other |
|
77 |
+ * entries in this Msg as inputs. This is removed, evaluated, and the result is |
|
78 |
+ * placed in "text". If there is an error during evaluation, the error is placed |
|
79 |
+ * in "templateErr", the original id of the message is stored in "templateId", |
|
80 |
+ * and the Msg id is changed to "tmplErr". Of course, this should never happen |
|
81 |
+ * if there are no mistakes in the calling code. |
|
82 |
+ */ |
|
83 |
+ |
|
84 |
+var ( |
|
85 |
+ ErrorLevel = Level{0, "error", "ERROR: ", ct.Red, true} // Something is definitely wrong |
|
86 |
+ WarnLevel = Level{1, "warn", "WARN: ", ct.Yellow, true} // Likely to be an issue but maybe not |
|
87 |
+ NoticeLevel = Level{2, "note", "[Note] ", ct.White, false} // Introductory / summary |
|
88 |
+ InfoLevel = Level{3, "info", "Info: ", ct.None, false} // Just informational |
|
89 |
+ DebugLevel = Level{4, "debug", "debug: ", ct.None, false} // Extra verbose |
|
90 |
+) |
|
91 |
+ |
|
92 |
+// Provide a summary at the end |
|
93 |
+func (l *Logger) Summary() { |
|
94 |
+ l.Notice("summary", "\nSummary of diagnostics execution:\n") |
|
95 |
+ if l.warningsSeen > 0 { |
|
96 |
+ l.Noticem("sumWarn", Msg{"tmpl": "Warnings seen: {{.num}}", "num": l.warningsSeen}) |
|
97 |
+ } |
|
98 |
+ if l.errorsSeen > 0 { |
|
99 |
+ l.Noticem("sumErr", Msg{"tmpl": "Errors seen: {{.num}}", "num": l.errorsSeen}) |
|
100 |
+ } |
|
101 |
+ if l.warningsSeen == 0 && l.errorsSeen == 0 { |
|
102 |
+ l.Notice("sumNone", "Completed with no errors or warnings seen.") |
|
103 |
+ } |
|
104 |
+} |
|
105 |
+ |
|
106 |
+func (l *Logger) Log(level Level, id string, msg Msg) { |
|
107 |
+ if level.Level > l.level.Level { |
|
108 |
+ return |
|
109 |
+ } |
|
110 |
+ msg["id"] = id // TODO: use to retrieve template from elsewhere |
|
111 |
+ // if given a template, convert it to text |
|
112 |
+ if tmpl, exists := msg["tmpl"]; exists { |
|
113 |
+ var buff bytes.Buffer |
|
114 |
+ if tmplString, assertion := tmpl.(string); !assertion { |
|
115 |
+ msg["templateErr"] = fmt.Sprintf("Invalid template type: %T", tmpl) |
|
116 |
+ msg["templateId"] = id |
|
117 |
+ msg["id"] = "tmplErr" |
|
118 |
+ } else { |
|
119 |
+ parsedTmpl, err := template.New(id).Parse(tmplString) |
|
120 |
+ if err != nil { |
|
121 |
+ msg["templateErr"] = err.Error() |
|
122 |
+ msg["templateId"] = id |
|
123 |
+ msg["id"] = "tmplErr" |
|
124 |
+ } else if err = parsedTmpl.Execute(&buff, msg); err != nil { |
|
125 |
+ msg["templateErr"] = err.Error() |
|
126 |
+ msg["templateId"] = id |
|
127 |
+ msg["id"] = "tmplErr" |
|
128 |
+ } else { |
|
129 |
+ msg["text"] = buff.String() |
|
130 |
+ delete(msg, "tmpl") |
|
131 |
+ } |
|
132 |
+ } |
|
133 |
+ } |
|
134 |
+ if level.Level == ErrorLevel.Level { |
|
135 |
+ l.errorsSeen += 1 |
|
136 |
+ } else if level.Level == WarnLevel.Level { |
|
137 |
+ l.warningsSeen += 1 |
|
138 |
+ } |
|
139 |
+ l.logger.Write(level, msg) |
|
140 |
+} |
|
141 |
+ |
|
142 |
+// Convenience functions |
|
143 |
+func (l *Logger) Error(id string, text string) { |
|
144 |
+ l.Log(ErrorLevel, id, Msg{"text": text}) |
|
145 |
+} |
|
146 |
+func (l *Logger) Errorf(id string, msg string, a ...interface{}) { |
|
147 |
+ l.Error(id, fmt.Sprintf(msg, a...)) |
|
148 |
+} |
|
149 |
+func (l *Logger) Errorm(id string, msg Msg) { |
|
150 |
+ l.Log(ErrorLevel, id, msg) |
|
151 |
+} |
|
152 |
+func (l *Logger) Warn(id string, text string) { |
|
153 |
+ l.Log(WarnLevel, id, Msg{"text": text}) |
|
154 |
+} |
|
155 |
+func (l *Logger) Warnf(id string, msg string, a ...interface{}) { |
|
156 |
+ l.Warn(id, fmt.Sprintf(msg, a...)) |
|
157 |
+} |
|
158 |
+func (l *Logger) Warnm(id string, msg Msg) { |
|
159 |
+ l.Log(WarnLevel, id, msg) |
|
160 |
+} |
|
161 |
+func (l *Logger) Info(id string, text string) { |
|
162 |
+ l.Log(InfoLevel, id, Msg{"text": text}) |
|
163 |
+} |
|
164 |
+func (l *Logger) Infof(id string, msg string, a ...interface{}) { |
|
165 |
+ l.Info(id, fmt.Sprintf(msg, a...)) |
|
166 |
+} |
|
167 |
+func (l *Logger) Infom(id string, msg Msg) { |
|
168 |
+ l.Log(InfoLevel, id, msg) |
|
169 |
+} |
|
170 |
+func (l *Logger) Notice(id string, text string) { |
|
171 |
+ l.Log(NoticeLevel, id, Msg{"text": text}) |
|
172 |
+} |
|
173 |
+func (l *Logger) Noticef(id string, msg string, a ...interface{}) { |
|
174 |
+ l.Notice(id, fmt.Sprintf(msg, a...)) |
|
175 |
+} |
|
176 |
+func (l *Logger) Noticem(id string, msg Msg) { |
|
177 |
+ l.Log(NoticeLevel, id, msg) |
|
178 |
+} |
|
179 |
+func (l *Logger) Debug(id string, text string) { |
|
180 |
+ l.Log(DebugLevel, id, Msg{"text": text}) |
|
181 |
+} |
|
182 |
+func (l *Logger) Debugf(id string, msg string, a ...interface{}) { |
|
183 |
+ l.Debug(id, fmt.Sprintf(msg, a...)) |
|
184 |
+} |
|
185 |
+func (l *Logger) Debugm(id string, msg Msg) { |
|
186 |
+ l.Log(DebugLevel, id, msg) |
|
187 |
+} |
|
188 |
+ |
|
189 |
+func (l *Logger) Finish() { |
|
190 |
+ l.logger.Finish() |
|
191 |
+} |
|
192 |
+ |
|
193 |
+func (l *Logger) ErrorsSeen() bool { |
|
194 |
+ return l.errorsSeen > 0 |
|
195 |
+} |
|
196 |
+ |
|
197 |
+// turn excess lines into [...] |
|
198 |
+func LimitLines(msg string, n int) string { |
|
199 |
+ lines := strings.SplitN(msg, "\n", n+1) |
|
200 |
+ if len(lines) == n+1 { |
|
201 |
+ lines[n] = "[...]" |
|
202 |
+ } |
|
203 |
+ return strings.Join(lines, "\n") |
|
204 |
+} |
0 | 205 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,53 @@ |
0 |
+package log |
|
1 |
+ |
|
2 |
+import ( |
|
3 |
+ "fmt" |
|
4 |
+ ct "github.com/daviddengcn/go-colortext" |
|
5 |
+ "github.com/docker/docker/pkg/term" |
|
6 |
+ "io" |
|
7 |
+ "os" |
|
8 |
+ "strings" |
|
9 |
+) |
|
10 |
+ |
|
11 |
+type textLogger struct { |
|
12 |
+ out io.Writer |
|
13 |
+ ttyOutput bool // usually want color; but do not output colors to non-tty |
|
14 |
+ lastNewline bool // keep track of newline separation |
|
15 |
+} |
|
16 |
+ |
|
17 |
+func newTextLogger(out io.Writer) *textLogger { |
|
18 |
+ logger := &textLogger{out: out, lastNewline: true} |
|
19 |
+ |
|
20 |
+ if IsTerminal(out) { |
|
21 |
+ // only want color sequences to humans, not redirected output (logs, "less", etc.) |
|
22 |
+ logger.ttyOutput = true |
|
23 |
+ } |
|
24 |
+ return logger |
|
25 |
+} |
|
26 |
+ |
|
27 |
+// cribbed a la "github.com/openshift/origin/pkg/cmd/util" |
|
28 |
+func IsTerminal(w io.Writer) bool { |
|
29 |
+ file, ok := w.(*os.File) |
|
30 |
+ return ok && term.IsTerminal(file.Fd()) |
|
31 |
+} |
|
32 |
+ |
|
33 |
+func (t *textLogger) Write(l Level, msg Msg) { |
|
34 |
+ if t.ttyOutput { |
|
35 |
+ ct.ChangeColor(l.Color, l.Bright, ct.None, false) |
|
36 |
+ } |
|
37 |
+ text := strings.TrimSpace(fmt.Sprintf("%v", msg["text"])) |
|
38 |
+ if strings.Contains(text, "\n") { // separate multiline comments with newlines |
|
39 |
+ if !t.lastNewline { |
|
40 |
+ fmt.Fprintln(t.out) // separate from previous one-line log msg |
|
41 |
+ } |
|
42 |
+ text = text + "\n" |
|
43 |
+ t.lastNewline = true |
|
44 |
+ } else { |
|
45 |
+ t.lastNewline = false |
|
46 |
+ } |
|
47 |
+ fmt.Fprintln(t.out, l.Prefix+strings.Replace(text, "\n", "\n ", -1)) |
|
48 |
+ if t.ttyOutput { |
|
49 |
+ ct.ResetColor() |
|
50 |
+ } |
|
51 |
+} |
|
52 |
+func (t *textLogger) Finish() {} |
0 | 53 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,19 @@ |
0 |
+package log |
|
1 |
+ |
|
2 |
+import ( |
|
3 |
+ "fmt" |
|
4 |
+ "gopkg.in/yaml.v2" |
|
5 |
+ "io" |
|
6 |
+) |
|
7 |
+ |
|
8 |
+type yamlLogger struct { |
|
9 |
+ out io.Writer |
|
10 |
+ logStarted bool |
|
11 |
+} |
|
12 |
+ |
|
13 |
+func (y *yamlLogger) Write(l Level, msg Msg) { |
|
14 |
+ msg["level"] = l.Name |
|
15 |
+ b, _ := yaml.Marshal(&msg) |
|
16 |
+ fmt.Fprintln(y.out, "---\n"+string(b)) |
|
17 |
+} |
|
18 |
+func (y *yamlLogger) Finish() {} |
0 | 19 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,129 @@ |
0 |
+package run |
|
1 |
+ |
|
2 |
+import ( |
|
3 |
+ "github.com/openshift/origin/pkg/cmd/experimental/diagnostics/options" |
|
4 |
+ "github.com/openshift/origin/pkg/cmd/server/start" |
|
5 |
+ "github.com/openshift/origin/pkg/diagnostics/client" |
|
6 |
+ "github.com/openshift/origin/pkg/diagnostics/discovery" |
|
7 |
+ "github.com/openshift/origin/pkg/diagnostics/log" |
|
8 |
+ "github.com/openshift/origin/pkg/diagnostics/systemd" |
|
9 |
+ "github.com/openshift/origin/pkg/diagnostics/types/diagnostic" |
|
10 |
+ "os" |
|
11 |
+ "strings" |
|
12 |
+) |
|
13 |
+ |
|
14 |
+func Diagnose(opts *options.AllDiagnosticsOptions) { |
|
15 |
+ // start output to a log |
|
16 |
+ dopts := opts.DiagOptions |
|
17 |
+ logger, _ := log.NewLogger(dopts.DiagLevel, dopts.DiagFormat, dopts.Output.Get()) |
|
18 |
+ // start discovery |
|
19 |
+ if env := RunDiscovery(opts, logger); env != nil { // discovery result can veto continuing |
|
20 |
+ allDiags := make(map[string]map[string]diagnostic.Diagnostic) |
|
21 |
+ // now we will figure out what diagnostics to run based on discovery. |
|
22 |
+ for area := range env.WillCheck { |
|
23 |
+ switch area { |
|
24 |
+ case discovery.ClientTarget: |
|
25 |
+ allDiags["client"] = client.Diagnostics |
|
26 |
+ case discovery.MasterTarget, discovery.NodeTarget: |
|
27 |
+ allDiags["systemd"] = systemd.Diagnostics |
|
28 |
+ } |
|
29 |
+ } |
|
30 |
+ if list := opts.DiagOptions.Diagnostics; len(*list) > 0 { |
|
31 |
+ // just run a specific (set of) diagnostic(s) |
|
32 |
+ for _, arg := range *list { |
|
33 |
+ parts := strings.SplitN(arg, ".", 2) |
|
34 |
+ if len(parts) < 2 { |
|
35 |
+ env.Log.Noticef("noDiag", `There is no such diagnostic "%s"`, arg) |
|
36 |
+ continue |
|
37 |
+ } |
|
38 |
+ area, name := parts[0], parts[1] |
|
39 |
+ if diagnostics, exists := allDiags[area]; !exists { |
|
40 |
+ env.Log.Noticef("noDiag", `There is no such diagnostic "%s"`, arg) |
|
41 |
+ } else if diag, exists := diagnostics[name]; !exists { |
|
42 |
+ env.Log.Noticef("noDiag", `There is no such diagnostic "%s"`, arg) |
|
43 |
+ } else { |
|
44 |
+ RunDiagnostic(area, name, diag, env) |
|
45 |
+ } |
|
46 |
+ } |
|
47 |
+ } else { |
|
48 |
+ // TODO: run all of these in parallel but ensure sane output |
|
49 |
+ for area, diagnostics := range allDiags { |
|
50 |
+ for name, diag := range diagnostics { |
|
51 |
+ RunDiagnostic(area, name, diag, env) |
|
52 |
+ } |
|
53 |
+ } |
|
54 |
+ } |
|
55 |
+ } |
|
56 |
+ logger.Summary() |
|
57 |
+ logger.Finish() |
|
58 |
+ if logger.ErrorsSeen() { |
|
59 |
+ os.Exit(255) |
|
60 |
+ } |
|
61 |
+} |
|
62 |
+ |
|
63 |
+// ---------------------------------------------------------- |
|
64 |
+// Examine system and return findings in an Environment |
|
65 |
+func RunDiscovery(adOpts *options.AllDiagnosticsOptions, logger *log.Logger) *discovery.Environment { |
|
66 |
+ logger.Notice("discBegin", "Beginning discovery of environment") |
|
67 |
+ env := discovery.NewEnvironment(adOpts, logger) |
|
68 |
+ env.DiscoverOperatingSystem() |
|
69 |
+ if adOpts.MasterDiagOptions != nil || adOpts.NodeDiagOptions != nil { |
|
70 |
+ env.DiscoverSystemd() |
|
71 |
+ } |
|
72 |
+ if mdOpts := adOpts.MasterDiagOptions; mdOpts != nil { |
|
73 |
+ if mdOpts.MasterStartOptions == nil { |
|
74 |
+ mdOpts.MasterStartOptions = &start.MasterOptions{ConfigFile: adOpts.MasterConfigPath} |
|
75 |
+ // leaving MasterArgs nil signals it has to be a master config file or nothing. |
|
76 |
+ } else if adOpts.MasterConfigPath != "" { |
|
77 |
+ mdOpts.MasterStartOptions.ConfigFile = adOpts.MasterConfigPath |
|
78 |
+ } |
|
79 |
+ env.DiscoverMaster() |
|
80 |
+ } |
|
81 |
+ if ndOpts := adOpts.NodeDiagOptions; ndOpts != nil { |
|
82 |
+ if ndOpts.NodeStartOptions == nil { |
|
83 |
+ ndOpts.NodeStartOptions = &start.NodeOptions{ConfigFile: adOpts.NodeConfigPath} |
|
84 |
+ // no NodeArgs signals it has to be a node config file or nothing. |
|
85 |
+ } else if adOpts.NodeConfigPath != "" { |
|
86 |
+ ndOpts.NodeStartOptions.ConfigFile = adOpts.NodeConfigPath |
|
87 |
+ } |
|
88 |
+ env.DiscoverNode() |
|
89 |
+ } |
|
90 |
+ if cdOpts := adOpts.ClientDiagOptions; cdOpts != nil { |
|
91 |
+ env.DiscoverClient() |
|
92 |
+ env.ReadClientConfigFiles() // so user knows where config is coming from (or not) |
|
93 |
+ env.ConfigClient() |
|
94 |
+ } |
|
95 |
+ checkAny := false |
|
96 |
+ for _, check := range env.WillCheck { |
|
97 |
+ checkAny = checkAny || check |
|
98 |
+ } |
|
99 |
+ if !checkAny { |
|
100 |
+ logger.Error("discNoChecks", "Cannot find any OpenShift configuration. Please specify which component or configuration you wish to troubleshoot.") |
|
101 |
+ return nil |
|
102 |
+ } |
|
103 |
+ return env |
|
104 |
+} |
|
105 |
+ |
|
106 |
+func RunDiagnostic(area string, name string, diag diagnostic.Diagnostic, env *discovery.Environment) { |
|
107 |
+ defer func() { |
|
108 |
+ // recover from diagnostics that panic so others can still run |
|
109 |
+ if r := recover(); r != nil { |
|
110 |
+ env.Log.Errorf("diagPanic", "Diagnostic '%s' crashed; this is usually a bug in either diagnostics or OpenShift. Stack trace:\n%+v", name, r) |
|
111 |
+ } |
|
112 |
+ }() |
|
113 |
+ if diag.Condition != nil { |
|
114 |
+ if skip, reason := diag.Condition(env); skip { |
|
115 |
+ if reason == "" { |
|
116 |
+ env.Log.Noticem("diagSkip", log.Msg{"area": area, "name": name, "diag": diag.Description, |
|
117 |
+ "tmpl": "Skipping diagnostic: {{.area}}.{{.name}}\nDescription: {{.diag}}"}) |
|
118 |
+ } else { |
|
119 |
+ env.Log.Noticem("diagSkip", log.Msg{"area": area, "name": name, "diag": diag.Description, "reason": reason, |
|
120 |
+ "tmpl": "Skipping diagnostic: {{.area}}.{{.name}}\nDescription: {{.diag}}\nBecause: {{.reason}}"}) |
|
121 |
+ } |
|
122 |
+ return |
|
123 |
+ } |
|
124 |
+ } |
|
125 |
+ env.Log.Noticem("diagRun", log.Msg{"area": area, "name": name, "diag": diag.Description, |
|
126 |
+ "tmpl": "Running diagnostic: {{.area}}.{{.name}}\nDescription: {{.diag}}"}) |
|
127 |
+ diag.Run(env) |
|
128 |
+} |
0 | 129 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,514 @@ |
0 |
+package systemd |
|
1 |
+ |
|
2 |
+import ( |
|
3 |
+ "bufio" |
|
4 |
+ "encoding/json" |
|
5 |
+ "fmt" |
|
6 |
+ "github.com/openshift/origin/pkg/diagnostics/discovery" |
|
7 |
+ "github.com/openshift/origin/pkg/diagnostics/log" |
|
8 |
+ "github.com/openshift/origin/pkg/diagnostics/types" |
|
9 |
+ "github.com/openshift/origin/pkg/diagnostics/types/diagnostic" |
|
10 |
+ "io" |
|
11 |
+ "os/exec" |
|
12 |
+ "regexp" |
|
13 |
+) |
|
14 |
+ |
|
15 |
+type logEntry struct { |
|
16 |
+ Message string // I feel certain we will want more fields at some point |
|
17 |
+} |
|
18 |
+ |
|
19 |
+type logMatcher struct { // regex for scanning log messages and interpreting them when found |
|
20 |
+ Regexp *regexp.Regexp |
|
21 |
+ Level log.Level |
|
22 |
+ Id string |
|
23 |
+ Interpretation string // log with above level+id if it's simple |
|
24 |
+ KeepAfterMatch bool // usually note only first matched entry, ignore rest |
|
25 |
+ Interpret func( // run this for custom logic on match |
|
26 |
+ env *discovery.Environment, |
|
27 |
+ entry *logEntry, |
|
28 |
+ matches []string, |
|
29 |
+ ) bool // KeepAfterMatch? |
|
30 |
+} |
|
31 |
+ |
|
32 |
+type unitSpec struct { |
|
33 |
+ Name string |
|
34 |
+ StartMatch *regexp.Regexp // regex to look for in log messages indicating startup |
|
35 |
+ LogMatchers []logMatcher // suspect log patterns to check for - checked in order |
|
36 |
+} |
|
37 |
+ |
|
38 |
+// |
|
39 |
+// -------- Things that feed into the diagnostics definitions ----------- |
|
40 |
+// Search for Diagnostics for the actual diagnostics. |
|
41 |
+ |
|
42 |
+// Reusable log matchers: |
|
43 |
+var badImageTemplate = logMatcher{ |
|
44 |
+ Regexp: regexp.MustCompile(`Unable to find an image for .* due to an error processing the format: %!v\\(MISSING\\)`), |
|
45 |
+ Level: log.InfoLevel, |
|
46 |
+ Interpretation: ` |
|
47 |
+This error indicates openshift was given the flag --images including an invalid format variable. |
|
48 |
+Valid formats can include (literally) ${component} and ${version}. |
|
49 |
+This could be a typo or you might be intending to hardcode something, |
|
50 |
+such as a version which should be specified as e.g. v3.0, not ${v3.0}. |
|
51 |
+Note that the --images flag may be supplied via the OpenShift master, |
|
52 |
+node, or "openshift ex registry/router" invocations and should usually |
|
53 |
+be the same for each.`, |
|
54 |
+} |
|
55 |
+ |
|
56 |
+// captures for logMatcher Interpret functions to store state between matches |
|
57 |
+var tlsClientErrorSeen map[string]bool |
|
58 |
+ |
|
59 |
+// Specify what units we can check and what to look for and say about it |
|
60 |
+var unitLogSpecs = []*unitSpec{ |
|
61 |
+ { |
|
62 |
+ Name: "openshift-master", |
|
63 |
+ StartMatch: regexp.MustCompile("Starting an OpenShift master"), |
|
64 |
+ LogMatchers: []logMatcher{ |
|
65 |
+ badImageTemplate, |
|
66 |
+ { |
|
67 |
+ Regexp: regexp.MustCompile("Unable to decode an event from the watch stream: local error: unexpected message"), |
|
68 |
+ Level: log.InfoLevel, |
|
69 |
+ Id: "sdLogOMIgnore", |
|
70 |
+ Interpretation: "You can safely ignore this message.", |
|
71 |
+ }, |
|
72 |
+ { |
|
73 |
+ Regexp: regexp.MustCompile("HTTP probe error: Get .*/healthz: dial tcp .*:10250: connection refused"), |
|
74 |
+ Level: log.InfoLevel, |
|
75 |
+ Id: "sdLogOMhzRef", |
|
76 |
+ Interpretation: ` |
|
77 |
+The OpenShift master does a health check on nodes that are defined in |
|
78 |
+its records, and this is the result when the node is not available yet. |
|
79 |
+Since the master records are typically created before the node is |
|
80 |
+available, this is not usually a problem, unless it continues in the |
|
81 |
+logs after the node is actually available.`, |
|
82 |
+ }, |
|
83 |
+ { |
|
84 |
+ // TODO: don't rely on ipv4 format, should be ipv6 "soon" |
|
85 |
+ Regexp: regexp.MustCompile("http: TLS handshake error from ([\\d.]+):\\d+: remote error: bad certificate"), |
|
86 |
+ Level: log.WarnLevel, |
|
87 |
+ Interpret: func(env *discovery.Environment, entry *logEntry, matches []string) bool { |
|
88 |
+ client := matches[1] |
|
89 |
+ prelude := fmt.Sprintf("Found 'openshift-master' journald log message:\n %s\n", entry.Message) |
|
90 |
+ if tlsClientErrorSeen == nil { // first time this message was seen |
|
91 |
+ tlsClientErrorSeen = map[string]bool{client: true} |
|
92 |
+ // TODO: too generic, adjust message depending on subnet of the "from" address |
|
93 |
+ env.Log.Warnm("sdLogOMreBadCert", log.Msg{"client": client, "text": prelude + ` |
|
94 |
+This error indicates that a client attempted to connect to the master |
|
95 |
+HTTPS API server but broke off the connection because the master's |
|
96 |
+certificate is not validated by a cerificate authority (CA) acceptable |
|
97 |
+to the client. There are a number of ways this can occur, some more |
|
98 |
+problematic than others. |
|
99 |
+ |
|
100 |
+At this time, the OpenShift master certificate is signed by a private CA |
|
101 |
+(created the first time the master runs) and clients should have a copy of |
|
102 |
+that CA certificate in order to validate connections to the master. Most |
|
103 |
+likely, either: |
|
104 |
+1. the master has generated a new CA (after the administrator deleted |
|
105 |
+ the old one) and the client has a copy of the old CA cert, or |
|
106 |
+2. the client hasn't been configured with a private CA at all (or the |
|
107 |
+ wrong one), or |
|
108 |
+3. the client is attempting to reach the master at a URL that isn't |
|
109 |
+ covered by the master's server certificate, e.g. a public-facing |
|
110 |
+ name or IP that isn't known to the master automatically; this may |
|
111 |
+ need to be specified with the --public-master flag on the master |
|
112 |
+ in order to generate a new server certificate including it. |
|
113 |
+ |
|
114 |
+Clients of the master may include users, nodes, and infrastructure |
|
115 |
+components running as containers. Check the "from" IP address in the |
|
116 |
+log message: |
|
117 |
+* If it is from a SDN IP, it is likely from an infrastructure |
|
118 |
+ component. Check pod logs and recreate it with the correct CA cert. |
|
119 |
+ Routers and registries won't work properly with the wrong CA. |
|
120 |
+* If it is from a node IP, the client is likely a node. Check the |
|
121 |
+ openshift-node and openshift-sdn-node logs and reconfigure with the |
|
122 |
+ correct CA cert. Nodes will be unable to create pods until this is |
|
123 |
+ corrected. |
|
124 |
+* If it is from an external IP, it is likely from a user (CLI, browser, |
|
125 |
+ etc.). osc and openshift clients should be configured with the correct |
|
126 |
+ CA cert; browsers can also add CA certs but it is usually easier |
|
127 |
+ to just have them accept the server certificate on the first visit |
|
128 |
+ (so this message may simply indicate that the master generated a new |
|
129 |
+ server certificate, e.g. to add a different --public-master, and a |
|
130 |
+ browser hasn't accepted it yet and is still attempting API calls; |
|
131 |
+ try logging out of the console and back in again).`}) |
|
132 |
+ } else if !tlsClientErrorSeen[client] { |
|
133 |
+ tlsClientErrorSeen[client] = true |
|
134 |
+ env.Log.Warnm("sdLogOMreBadCert", log.Msg{"client": client, "text": prelude + |
|
135 |
+ `This message was diagnosed above, but for a different client address.`}) |
|
136 |
+ } // else, it's a repeat, don't mention it |
|
137 |
+ return true // show once for every client failing to connect, not just the first |
|
138 |
+ }, |
|
139 |
+ }, |
|
140 |
+ { |
|
141 |
+ // user &{system:anonymous [system:unauthenticated]} -> /api/v1beta1/services?namespace=" |
|
142 |
+ Regexp: regexp.MustCompile("system:anonymous\\W*system:unauthenticated\\W*/api/v1beta1/services\\?namespace="), |
|
143 |
+ Level: log.WarnLevel, |
|
144 |
+ Id: "sdLogOMunauthNode", |
|
145 |
+ Interpretation: ` |
|
146 |
+This indicates the OpenShift API server (master) received an unscoped |
|
147 |
+request to get Services. Requests like this probably come from an |
|
148 |
+OpenShift node trying to discover where it should proxy services. |
|
149 |
+ |
|
150 |
+However, the request was unauthenticated, so it was denied. The node |
|
151 |
+either did not offer a client certificate for credential, or offered an |
|
152 |
+invalid one (not signed by the certificate authority the master uses). |
|
153 |
+The node will not be able to function without this access. |
|
154 |
+ |
|
155 |
+Unfortunately, this message does not tell us *which* node is the |
|
156 |
+problem. But running diagnostics on your node hosts should find a log |
|
157 |
+message for any node with this problem. |
|
158 |
+`, |
|
159 |
+ }, |
|
160 |
+ }, |
|
161 |
+ }, |
|
162 |
+ { |
|
163 |
+ Name: "openshift-sdn-master", |
|
164 |
+ StartMatch: regexp.MustCompile("Starting OpenShift SDN Master"), |
|
165 |
+ LogMatchers: []logMatcher{}, |
|
166 |
+ }, |
|
167 |
+ { |
|
168 |
+ Name: "openshift-node", |
|
169 |
+ StartMatch: regexp.MustCompile("Starting an OpenShift node"), |
|
170 |
+ LogMatchers: []logMatcher{ |
|
171 |
+ badImageTemplate, |
|
172 |
+ { |
|
173 |
+ Regexp: regexp.MustCompile(`error updating node status, will retry:.*system:(\S+) cannot get on minions with name "(\S+)" in default|Failed to list .*Forbidden: "\S+" system:node-\S+ cannot list on (pods|services) in`), |
|
174 |
+ Level: log.ErrorLevel, |
|
175 |
+ Id: "sdLogONnodePerm", |
|
176 |
+ Interpretation: ` |
|
177 |
+openshift-node lacks the permission to update the node's status or request |
|
178 |
+its responsibilities from the OpenShift master API. This host will not |
|
179 |
+function as a node until this is resolved. Pods scheduled for this node |
|
180 |
+will remain in pending or unknown state forever. |
|
181 |
+ |
|
182 |
+This probably indicates a problem with policy as node credentials in beta3 |
|
183 |
+allow access to anything (later, they will be constrained only to pods |
|
184 |
+that belong to them). This message indicates that the node credentials |
|
185 |
+are authenticated, but not authorized for the necessary access. |
|
186 |
+ |
|
187 |
+One way to encounter this is to start the master with data from an older |
|
188 |
+installation (e.g. beta2) in etcd. The default startup will not update |
|
189 |
+existing policy to allow node access as they would have if starting with |
|
190 |
+an empty etcd. In this case, the following command (as admin): |
|
191 |
+ |
|
192 |
+ osc get rolebindings -n master |
|
193 |
+ |
|
194 |
+... should show group system:nodes has the master/system:component role. |
|
195 |
+If that is missing, you may wish to rewrite the bootstrap policy with: |
|
196 |
+ |
|
197 |
+ POLICY=/var/lib/openshift/openshift.local.policy/policy.json |
|
198 |
+ CONF=/etc/openshift/master.yaml |
|
199 |
+ openshift admin overwrite-policy --filename=$POLICY --master-config=$CONF |
|
200 |
+ |
|
201 |
+If that is not the problem, then it may be that access controls on nodes |
|
202 |
+have been put in place and are blocking this request; check the error |
|
203 |
+message to see whether the node is attempting to use the wrong node name. |
|
204 |
+`, |
|
205 |
+ }, |
|
206 |
+ { |
|
207 |
+ Regexp: regexp.MustCompile("Unable to load services: Get (http\\S+/api/v1beta1/services\\?namespace=): (.+)"), // e.g. x509: certificate signed by unknown authority |
|
208 |
+ Level: log.ErrorLevel, |
|
209 |
+ Id: "sdLogONconnMaster", |
|
210 |
+ Interpretation: ` |
|
211 |
+openshift-node could not connect to the OpenShift master API in order |
|
212 |
+to determine its responsibilities. This host will not function as a node |
|
213 |
+until this is resolved. Pods scheduled for this node will remain in |
|
214 |
+pending or unknown state forever.`, |
|
215 |
+ }, |
|
216 |
+ { |
|
217 |
+ Regexp: regexp.MustCompile(`Unable to load services: request.*403 Forbidden: Forbidden: "/api/v1beta1/services\?namespace=" denied by default`), |
|
218 |
+ Level: log.ErrorLevel, |
|
219 |
+ Id: "sdLogONMasterForbids", |
|
220 |
+ Interpretation: ` |
|
221 |
+openshift-node could not connect to the OpenShift master API to determine |
|
222 |
+its responsibilities because it lacks the proper credentials. Nodes |
|
223 |
+should specify a client certificate in order to identify themselves to |
|
224 |
+the master. This message typically means that either no client key/cert |
|
225 |
+was supplied, or it is not validated by the certificate authority (CA) |
|
226 |
+the master uses. You should supply a correct client key and certificate |
|
227 |
+to the .kubeconfig specified in /etc/sysconfig/openshift-node |
|
228 |
+ |
|
229 |
+This host will not function as a node until this is resolved. Pods |
|
230 |
+scheduled for this node will remain in pending or unknown state forever.`, |
|
231 |
+ }, |
|
232 |
+ }, |
|
233 |
+ }, |
|
234 |
+ { |
|
235 |
+ Name: "openshift-sdn-node", |
|
236 |
+ StartMatch: regexp.MustCompile("Starting OpenShift SDN node"), |
|
237 |
+ LogMatchers: []logMatcher{ |
|
238 |
+ { |
|
239 |
+ Regexp: regexp.MustCompile("Could not find an allocated subnet for this minion.*Waiting.."), |
|
240 |
+ Level: log.WarnLevel, |
|
241 |
+ Id: "sdLogOSNnoSubnet", |
|
242 |
+ Interpretation: ` |
|
243 |
+This warning occurs when openshift-sdn-node is trying to request the |
|
244 |
+SDN subnet it should be configured with according to openshift-sdn-master, |
|
245 |
+but either can't connect to it ("All the given peers are not reachable") |
|
246 |
+or has not yet been assigned a subnet ("Key not found"). |
|
247 |
+ |
|
248 |
+This can just be a matter of waiting for the master to become fully |
|
249 |
+available and define a record for the node (aka "minion") to use, |
|
250 |
+and openshift-sdn-node will wait until that occurs, so the presence |
|
251 |
+of this message in the node log isn't necessarily a problem as |
|
252 |
+long as the SDN is actually working, but this message may help indicate |
|
253 |
+the problem if it is not working. |
|
254 |
+ |
|
255 |
+If the master is available and this node's record is defined and this |
|
256 |
+message persists, then it may be a sign of a different misconfiguration. |
|
257 |
+Unfortunately the message is not specific about why the connection failed. |
|
258 |
+Check MASTER_URL in /etc/sysconfig/openshift-sdn-node: |
|
259 |
+ * Is the protocol https? It should be http. |
|
260 |
+ * Can you reach the address and port from the node using curl? |
|
261 |
+ ("404 page not found" is correct response)`, |
|
262 |
+ }, |
|
263 |
+ }, |
|
264 |
+ }, |
|
265 |
+ { |
|
266 |
+ Name: "docker", |
|
267 |
+ StartMatch: regexp.MustCompile(`Starting Docker Application Container Engine.`), // RHEL Docker at least |
|
268 |
+ LogMatchers: []logMatcher{ |
|
269 |
+ { |
|
270 |
+ Regexp: regexp.MustCompile(`Usage: docker \\[OPTIONS\\] COMMAND`), |
|
271 |
+ Level: log.ErrorLevel, |
|
272 |
+ Id: "sdLogDbadOpt", |
|
273 |
+ Interpretation: ` |
|
274 |
+This indicates that docker failed to parse its command line |
|
275 |
+successfully, so it just printed a standard usage message and exited. |
|
276 |
+Its command line is built from variables in /etc/sysconfig/docker |
|
277 |
+(which may be overridden by variables in /etc/sysconfig/openshift-sdn-node) |
|
278 |
+so check there for problems. |
|
279 |
+ |
|
280 |
+The OpenShift node will not work on this host until this is resolved.`, |
|
281 |
+ }, |
|
282 |
+ { |
|
283 |
+ Regexp: regexp.MustCompile(`^Unable to open the database file: unable to open database file$`), |
|
284 |
+ Level: log.ErrorLevel, |
|
285 |
+ Id: "sdLogDopenDB", |
|
286 |
+ Interpretation: ` |
|
287 |
+This indicates that docker failed to record its state to its database. |
|
288 |
+The most likely reason is that it is out of disk space. It is also |
|
289 |
+possible for other device or permissions problems to be at fault. |
|
290 |
+ |
|
291 |
+Sometimes this is due to excess completed containers not being cleaned |
|
292 |
+up. You can delete all completed containers with this command (running |
|
293 |
+containers will not be deleted): |
|
294 |
+ |
|
295 |
+ # docker rm $(docker ps -qa) |
|
296 |
+ |
|
297 |
+Whatever the reason, docker will not function in this state. |
|
298 |
+The OpenShift node will not work on this host until this is resolved.`, |
|
299 |
+ }, |
|
300 |
+ { |
|
301 |
+ Regexp: regexp.MustCompile(`no space left on device$`), |
|
302 |
+ Level: log.ErrorLevel, |
|
303 |
+ Id: "sdLogDfull", |
|
304 |
+ Interpretation: ` |
|
305 |
+This indicates that docker has run out of space for container volumes |
|
306 |
+or metadata (by default, stored in /var/lib/docker, but configurable). |
|
307 |
+ |
|
308 |
+docker will not function in this state. It requires that disk space be |
|
309 |
+added to the relevant filesystem or files deleted to make space. |
|
310 |
+Sometimes this is due to excess completed containers not being cleaned |
|
311 |
+up. You can delete all completed containers with this command (running |
|
312 |
+containers will not be deleted): |
|
313 |
+ |
|
314 |
+ # docker rm $(docker ps -qa) |
|
315 |
+ |
|
316 |
+The OpenShift node will not work on this host until this is resolved.`, |
|
317 |
+ }, |
|
318 |
+ { // generic error seen - do this last |
|
319 |
+ Regexp: regexp.MustCompile(`\\slevel="fatal"\\s`), |
|
320 |
+ Level: log.ErrorLevel, |
|
321 |
+ Id: "sdLogDfatal", |
|
322 |
+ Interpretation: ` |
|
323 |
+This is not a known problem, but it is causing Docker to crash, |
|
324 |
+so the OpenShift node will not work on this host until it is resolved.`, |
|
325 |
+ }, |
|
326 |
+ }, |
|
327 |
+ }, |
|
328 |
+ { |
|
329 |
+ Name: "openvswitch", |
|
330 |
+ StartMatch: regexp.MustCompile("Starting Open vSwitch"), |
|
331 |
+ LogMatchers: []logMatcher{}, |
|
332 |
+ }, |
|
333 |
+} |
|
334 |
+ |
|
335 |
+var systemdRelevant = func(env *discovery.Environment) (skip bool, reason string) { |
|
336 |
+ if !env.HasSystemd { |
|
337 |
+ return true, "systemd is not present on this host" |
|
338 |
+ } |
|
339 |
+ return false, "" |
|
340 |
+} |
|
341 |
+ |
|
342 |
+// |
|
343 |
+// -------- The actual diagnostics definitions ----------- |
|
344 |
+// |
|
345 |
+ |
|
346 |
+var Diagnostics = map[string]diagnostic.Diagnostic{ |
|
347 |
+ |
|
348 |
+ "AnalyzeLogs": { |
|
349 |
+ Description: "Check for problems in systemd service logs since each service last started", |
|
350 |
+ Condition: systemdRelevant, |
|
351 |
+ Run: func(env *discovery.Environment) { |
|
352 |
+ for _, unit := range unitLogSpecs { |
|
353 |
+ if svc := env.SystemdUnits[unit.Name]; svc.Enabled || svc.Active { |
|
354 |
+ env.Log.Infom("sdCheckLogs", log.Msg{"tmpl": "Checking journalctl logs for '{{.name}}' service", "name": unit.Name}) |
|
355 |
+ matchLogsSinceLastStart(unit, env) |
|
356 |
+ } |
|
357 |
+ } |
|
358 |
+ }, |
|
359 |
+ }, |
|
360 |
+ |
|
361 |
+ "UnitStatus": { |
|
362 |
+ Description: "Check status for OpenShift-related systemd units", |
|
363 |
+ Condition: systemdRelevant, |
|
364 |
+ Run: func(env *discovery.Environment) { |
|
365 |
+ u := env.SystemdUnits |
|
366 |
+ unitRequiresUnit(env.Log, u["openshift-node"], u["iptables"], ` |
|
367 |
+iptables is used by OpenShift nodes for container networking. |
|
368 |
+Connections to a container will fail without it.`) |
|
369 |
+ unitRequiresUnit(env.Log, u["openshift-node"], u["docker"], `OpenShift nodes use Docker to run containers.`) |
|
370 |
+ unitRequiresUnit(env.Log, u["openshift"], u["docker"], `OpenShift nodes use Docker to run containers.`) |
|
371 |
+ // node's dependency on openvswitch is a special case. |
|
372 |
+ // We do not need to enable ovs because openshift-node starts it for us. |
|
373 |
+ if u["openshift-node"].Active && !u["openvswitch"].Active { |
|
374 |
+ env.Log.Error("sdUnitSDNreqOVS", ` |
|
375 |
+systemd unit openshift-node is running but openvswitch is not. |
|
376 |
+Normally openshift-node starts openvswitch once initialized. |
|
377 |
+It is likely that openvswitch has crashed or been stopped. |
|
378 |
+ |
|
379 |
+The software-defined network (SDN) enables networking between |
|
380 |
+containers on different nodes. Containers will not be able to |
|
381 |
+connect to each other without the openvswitch service carrying |
|
382 |
+this traffic. |
|
383 |
+ |
|
384 |
+An administrator can start openvswitch with: |
|
385 |
+ |
|
386 |
+ # systemctl start openvswitch |
|
387 |
+ |
|
388 |
+To ensure it is not repeatedly failing to run, check the status and logs with: |
|
389 |
+ |
|
390 |
+ # systemctl status openvswitch |
|
391 |
+ # journalctl -ru openvswitch `) |
|
392 |
+ } |
|
393 |
+ // Anything that is enabled but not running deserves notice |
|
394 |
+ for name, unit := range u { |
|
395 |
+ if unit.Enabled && !unit.Active { |
|
396 |
+ env.Log.Errorm("sdUnitInactive", log.Msg{"tmpl": ` |
|
397 |
+The {{.unit}} systemd unit is intended to start at boot but is not currently active. |
|
398 |
+An administrator can start the {{.unit}} unit with: |
|
399 |
+ |
|
400 |
+ # systemctl start {{.unit}} |
|
401 |
+ |
|
402 |
+To ensure it is not failing to run, check the status and logs with: |
|
403 |
+ |
|
404 |
+ # systemctl status {{.unit}} |
|
405 |
+ # journalctl -ru {{.unit}}`, "unit": name}) |
|
406 |
+ } |
|
407 |
+ } |
|
408 |
+ }, |
|
409 |
+ }, |
|
410 |
+} |
|
411 |
+ |
|
412 |
+// |
|
413 |
+// -------- Functions used by the diagnostics ----------- |
|
414 |
+// |
|
415 |
+ |
|
416 |
+func unitRequiresUnit(logger *log.Logger, unit types.SystemdUnit, requires types.SystemdUnit, reason string) { |
|
417 |
+ if (unit.Active || unit.Enabled) && !requires.Exists { |
|
418 |
+ logger.Errorm("sdUnitReqLoaded", log.Msg{"tmpl": ` |
|
419 |
+systemd unit {{.unit}} depends on unit {{.required}}, which is not loaded. |
|
420 |
+{{.reason}} |
|
421 |
+An administrator probably needs to install the {{.required}} unit with: |
|
422 |
+ |
|
423 |
+ # yum install {{.required}} |
|
424 |
+ |
|
425 |
+If it is already installed, you may to reload the definition with: |
|
426 |
+ |
|
427 |
+ # systemctl reload {{.required}} |
|
428 |
+ `, "unit": unit.Name, "required": requires.Name, "reason": reason}) |
|
429 |
+ } else if unit.Active && !requires.Active { |
|
430 |
+ logger.Errorm("sdUnitReqActive", log.Msg{"tmpl": ` |
|
431 |
+systemd unit {{.unit}} is running but {{.required}} is not. |
|
432 |
+{{.reason}} |
|
433 |
+An administrator can start the {{.required}} unit with: |
|
434 |
+ |
|
435 |
+ # systemctl start {{.required}} |
|
436 |
+ |
|
437 |
+To ensure it is not failing to run, check the status and logs with: |
|
438 |
+ |
|
439 |
+ # systemctl status {{.required}} |
|
440 |
+ # journalctl -ru {{.required}} |
|
441 |
+ `, "unit": unit.Name, "required": requires.Name, "reason": reason}) |
|
442 |
+ } else if unit.Enabled && !requires.Enabled { |
|
443 |
+ logger.Warnm("sdUnitReqEnabled", log.Msg{"tmpl": ` |
|
444 |
+systemd unit {{.unit}} is enabled to run automatically at boot, but {{.required}} is not. |
|
445 |
+{{.reason}} |
|
446 |
+An administrator can enable the {{.required}} unit with: |
|
447 |
+ |
|
448 |
+ # systemctl enable {{.required}} |
|
449 |
+ `, "unit": unit.Name, "required": requires.Name, "reason": reason}) |
|
450 |
+ } |
|
451 |
+} |
|
452 |
+ |
|
453 |
+func matchLogsSinceLastStart(unit *unitSpec, env *discovery.Environment) { |
|
454 |
+ cmd := exec.Command("journalctl", "-ru", unit.Name, "--output=json") |
|
455 |
+ // JSON comes out of journalctl one line per record |
|
456 |
+ lineReader, reader, err := func(cmd *exec.Cmd) (*bufio.Scanner, io.ReadCloser, error) { |
|
457 |
+ stdout, err := cmd.StdoutPipe() |
|
458 |
+ if err == nil { |
|
459 |
+ lineReader := bufio.NewScanner(stdout) |
|
460 |
+ if err = cmd.Start(); err == nil { |
|
461 |
+ return lineReader, stdout, nil |
|
462 |
+ } |
|
463 |
+ } |
|
464 |
+ return nil, nil, err |
|
465 |
+ }(cmd) |
|
466 |
+ if err != nil { |
|
467 |
+ env.Log.Errorm("sdLogReadErr", log.Msg{"tmpl": ` |
|
468 |
+Diagnostics failed to query journalctl for the '{{.unit}}' unit logs. |
|
469 |
+This should be very unusual, so please report this error: |
|
470 |
+{{.error}}`, "unit": unit.Name, "error": errStr(err)}) |
|
471 |
+ return |
|
472 |
+ } |
|
473 |
+ defer func() { // close out pipe once done reading |
|
474 |
+ reader.Close() |
|
475 |
+ cmd.Wait() |
|
476 |
+ }() |
|
477 |
+ entryTemplate := logEntry{Message: `json:"MESSAGE"`} |
|
478 |
+ matchCopy := append([]logMatcher(nil), unit.LogMatchers...) // make a copy, will remove matchers after they match something |
|
479 |
+ for lineReader.Scan() { // each log entry is a line |
|
480 |
+ if len(matchCopy) == 0 { // if no rules remain to match |
|
481 |
+ break // don't waste time reading more log entries |
|
482 |
+ } |
|
483 |
+ bytes, entry := lineReader.Bytes(), entryTemplate |
|
484 |
+ if err := json.Unmarshal(bytes, &entry); err != nil { |
|
485 |
+ env.Log.Debugm("sdLogBadJSON", log.Msg{"message": string(bytes), "error": errStr(err), |
|
486 |
+ "tmpl": "Couldn't read the JSON for this log message:\n{{.message}}\nGot error {{.error}}"}) |
|
487 |
+ } else { |
|
488 |
+ if unit.StartMatch.MatchString(entry.Message) { |
|
489 |
+ break // saw the log message where the unit started; done looking. |
|
490 |
+ } |
|
491 |
+ for index, match := range matchCopy { // match log message against provided matchers |
|
492 |
+ if strings := match.Regexp.FindStringSubmatch(entry.Message); strings != nil { |
|
493 |
+ // if matches: print interpretation, remove from matchCopy, and go on to next log entry |
|
494 |
+ keep := match.KeepAfterMatch |
|
495 |
+ if match.Interpret != nil { |
|
496 |
+ keep = match.Interpret(env, &entry, strings) |
|
497 |
+ } else { |
|
498 |
+ prelude := fmt.Sprintf("Found '%s' journald log message:\n %s\n", unit.Name, entry.Message) |
|
499 |
+ env.Log.Log(match.Level, match.Id, log.Msg{"text": prelude + match.Interpretation, "unit": unit.Name, "logMsg": entry.Message}) |
|
500 |
+ } |
|
501 |
+ if !keep { // remove matcher once seen |
|
502 |
+ matchCopy = append(matchCopy[:index], matchCopy[index+1:]...) |
|
503 |
+ } |
|
504 |
+ break |
|
505 |
+ } |
|
506 |
+ } |
|
507 |
+ } |
|
508 |
+ } |
|
509 |
+} |
|
510 |
+ |
|
511 |
+func errStr(err error) string { |
|
512 |
+ return fmt.Sprintf("(%T) %[1]v", err) |
|
513 |
+} |
0 | 514 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,16 @@ |
0 |
+package diagnostic |
|
1 |
+ |
|
2 |
+// This needed to be separate from other types to avoid import cycle |
|
3 |
+// diagnostic -> discovery -> types |
|
4 |
+ |
|
5 |
+import ( |
|
6 |
+ "github.com/openshift/origin/pkg/diagnostics/discovery" |
|
7 |
+) |
|
8 |
+ |
|
9 |
+type DiagnosticCondition func(env *discovery.Environment) (skip bool, reason string) |
|
10 |
+ |
|
11 |
+type Diagnostic struct { |
|
12 |
+ Description string |
|
13 |
+ Condition DiagnosticCondition |
|
14 |
+ Run func(env *discovery.Environment) |
|
15 |
+} |
0 | 9 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,38 @@ |
0 |
+package types |
|
1 |
+ |
|
2 |
+import "fmt" |
|
3 |
+ |
|
4 |
+type Version struct { |
|
5 |
+ X, Y, Z int |
|
6 |
+} |
|
7 |
+ |
|
8 |
+func (a Version) Eq(b Version) bool { |
|
9 |
+ return a.X == b.X && a.Y == b.Y && a.Z == b.Z |
|
10 |
+} |
|
11 |
+ |
|
12 |
+func (a Version) Gt(b Version) bool { |
|
13 |
+ if a.X > b.X { |
|
14 |
+ return true |
|
15 |
+ } |
|
16 |
+ if a.X < b.X { |
|
17 |
+ return false |
|
18 |
+ } // so, Xs are equal |
|
19 |
+ if a.Y > b.Y { |
|
20 |
+ return true |
|
21 |
+ } |
|
22 |
+ if a.Y < b.Y { |
|
23 |
+ return false |
|
24 |
+ } // so, Ys are equal |
|
25 |
+ if a.Z > b.Z { |
|
26 |
+ return true |
|
27 |
+ } |
|
28 |
+ return false |
|
29 |
+} |
|
30 |
+ |
|
31 |
+func (v Version) GoString() string { |
|
32 |
+ return fmt.Sprintf("%d.%d.%d", v.X, v.Y, v.Z) |
|
33 |
+} |
|
34 |
+ |
|
35 |
+func (v Version) NonZero() bool { |
|
36 |
+ return !v.Eq(Version{0, 0, 0}) |
|
37 |
+} |