GitList

Browse code

introduce `openshift ex diagnostics`

Luke Meyer authored on 2015/04/04 12:59:11
Showing 27 changed files

.gitignore index 0a85da2..8f458e8 100644
pkg/cmd/experimental/diagnostics/diagnostics.go index 0000000..a9f355b
pkg/cmd/experimental/diagnostics/options/all.go index 0000000..648bac5
pkg/cmd/experimental/diagnostics/options/client.go index 0000000..44c743a
pkg/cmd/experimental/diagnostics/options/diagnostics.go index 0000000..ea05a1a
pkg/cmd/experimental/diagnostics/options/flaginfo.go index 0000000..e0bf478
pkg/cmd/experimental/diagnostics/options/master.go index 0000000..141bcc1
pkg/cmd/experimental/diagnostics/options/node.go index 0000000..eabfa82
pkg/cmd/openshift/openshift.go index a8e9278..5cc17a2 100644
pkg/diagnostics/README.md index 0000000..5532729
pkg/diagnostics/client/client.go index 0000000..c004e81
pkg/diagnostics/discovery/client.go index 0000000..1e986dc
pkg/diagnostics/discovery/config.go index 0000000..fd05512
pkg/diagnostics/discovery/environment.go index 0000000..86605a2
pkg/diagnostics/discovery/master.go index 0000000..6396392
pkg/diagnostics/discovery/node.go index 0000000..d7ab668
pkg/diagnostics/discovery/os.go index 0000000..f3c83e8
pkg/diagnostics/discovery/systemd.go index 0000000..b857ec2
pkg/diagnostics/log/json.go index 0000000..2dd246a
pkg/diagnostics/log/log.go index 0000000..33add63
pkg/diagnostics/log/text.go index 0000000..e50182c
pkg/diagnostics/log/yaml.go index 0000000..6587f79
pkg/diagnostics/run/run.go index 0000000..ad0cb85
pkg/diagnostics/systemd/systemd.go index 0000000..1867730
pkg/diagnostics/types/diagnostic/type.go index 0000000..81ced5f
pkg/diagnostics/types/systemd_unit.go index 0000000..c76ca8d
pkg/diagnostics/types/version.go index 0000000..f217211

@@ -10,7 +10,6 @@
                      /examples/sample-app/logs/openshift.log
                      *.swp
                      .vimrc
                     -.kubeconfig
                      .vagrant-openshift.json*
                      .DS_Store
                      .idea

pkg/cmd/experimental/diagnostics/diagnostics.go

History View file @ 42fb599

                     new file mode 100644
@@ -0,0 +1,194 @@
                     +package cmd
+                    +
                     +import (
                     +	"fmt"
                     +	"github.com/openshift/origin/pkg/cmd/experimental/diagnostics/options"
                     +	"github.com/openshift/origin/pkg/cmd/server/start"
                     +	"github.com/openshift/origin/pkg/cmd/templates"
                     +	osclientcmd "github.com/openshift/origin/pkg/cmd/util/clientcmd"
                     +	"github.com/openshift/origin/pkg/diagnostics/run"
                     +	"github.com/spf13/cobra"
                     +	"io"
                     +)
+                    +
                     +const longAllDescription = `
                     +OpenShift Diagnostics
+                    +
                     +This command helps you understand and troubleshoot OpenShift. It is
                     +intended to be run from the same context as an OpenShift client or running
                     +master / node in order to troubleshoot from the perspective of each.
+                    +
                     +    $ %[1]s
+                    +
                     +If run without flags or subcommands, it will check for config files for
                     +client, master, and node, and if found, use them for troubleshooting
                     +those components. If master/node config files are not found, the tool
                     +assumes they are not present and does diagnostics only as a client.
+                    +
                     +You may also specify config files explicitly with flags below, in which
                     +case you will receive an error if they are invalid or not found.
+                    +
                     +    $ %[1]s --master-config=/etc/openshift/master/master-config.yaml
+                    +
                     +Subcommands may be used to scope the troubleshooting to a particular
                     +component and are not limited to using config files; you can and should
                     +use the same flags that are actually set on the command line for that
                     +component to configure the diagnostic.
+                    +
                     +    $ %[1]s node --hostname='node.example.com' --kubeconfig=...
+                    +
                     +NOTE: This is an alpha version of diagnostics and will change significantly.
                     +NOTE: Global flags (from the 'options' subcommand) are ignored here but
                     +can be used with subcommands.
                     +`
+                    +
                     +func NewCommandDiagnostics(name string, fullName string, out io.Writer) *cobra.Command {
                     +	opts := options.NewAllDiagnosticsOptions(out)
                     +	cmd := &cobra.Command{
                     +		Use:   name,
                     +		Short: "This utility helps you understand and troubleshoot OpenShift v3.",
                     +		Long:  fmt.Sprintf(longAllDescription, fullName),
                     +		Run: func(c *cobra.Command, args []string) {
                     +			opts.GlobalFlags = c.PersistentFlags()
                     +			run.Diagnose(opts)
                     +		},
                     +	}
                     +	cmd.SetOutput(out) // for output re: usage / help
                     +	opts.BindFlags(cmd.Flags(), options.NewAllDiagnosticsFlagInfos())
                     +	// Although we reuse DiagOptions across all commands, we do not want the flags buried
                     +	// in the "global" flags, so we add them locally at each command.
                     +	opts.DiagOptions.BindFlags(cmd.Flags(), options.NewDiagnosticsFlagInfos())
+                    +
                     +	/*
                     +	   This command needs the client factory built in the "client" subcommand.
                     +	   Generating the factory adds flags to the "client" cmd, and we do not want
                     +	   to add those flags to this command (the only client option here is a config
                     +	   file). So the factory object from client cmd is reused for this command.
                     +	*/
                     +	clientCmd, factory := NewClientCommand("client", name+" client", out)
                     +	opts.ClientDiagOptions.Factory = factory
+                    +
                     +	cmd.AddCommand(clientCmd)
                     +	cmd.AddCommand(NewMasterCommand("master", name+" master", out))
                     +	cmd.AddCommand(NewNodeCommand("node", name+" node", out))
                     +	cmd.AddCommand(NewOptionsCommand())
+                    +
                     +	return cmd
                     +}
+                    +
                     +const longClientDescription = `
                     +OpenShift Diagnostics
+                    +
                     +This command helps you understand and troubleshoot OpenShift as a user. It is
                     +intended to be run from the same context as an OpenShift client
                     +("openshift cli" or "osc") and with the same configuration options.
+                    +
                     +    $ %s
                     +`
+                    +
                     +func NewClientCommand(name string, fullName string, out io.Writer) (*cobra.Command, *osclientcmd.Factory) {
                     +	opts := options.NewClientDiagnosticsOptions(out, nil)
                     +	cmd := &cobra.Command{
                     +		Use:   name,
                     +		Short: "Troubleshoot using the OpenShift v3 client.",
                     +		Long:  fmt.Sprintf(longClientDescription, fullName),
                     +		Run: func(c *cobra.Command, args []string) {
                     +			run.Diagnose(&options.AllDiagnosticsOptions{
                     +				ClientDiagOptions: opts,
                     +				DiagOptions:       opts.DiagOptions,
                     +				GlobalFlags:       c.PersistentFlags(),
                     +			})
                     +		},
                     +	}
                     +	cmd.SetOutput(out) // for output re: usage / help
                     +	opts.MustCheck = true
                     +	opts.Factory = osclientcmd.New(cmd.PersistentFlags()) // side effect: add standard persistent flags for openshift client
                     +	opts.BindFlags(cmd.Flags(), options.NewClientDiagnosticsFlagInfos())
                     +	opts.DiagOptions.BindFlags(cmd.Flags(), options.NewDiagnosticsFlagInfos())
+                    +
                     +	cmd.AddCommand(NewOptionsCommand())
                     +	return cmd, opts.Factory
                     +}
+                    +
                     +const longMasterDescription = `
                     +OpenShift Diagnostics
+                    +
                     +This command helps you understand and troubleshoot a running OpenShift
                     +master. It is intended to be run from the same context as the master
                     +(where "openshift start" or "openshift start master" is run, possibly from
                     +systemd or inside a container) and with the same configuration options.
+                    +
                     +    $ %s
                     +`
+                    +
                     +func NewMasterCommand(name string, fullName string, out io.Writer) *cobra.Command {
                     +	opts := options.NewMasterDiagnosticsOptions(out, nil)
                     +	cmd := &cobra.Command{
                     +		Use:   name,
                     +		Short: "Troubleshoot an OpenShift v3 master.",
                     +		Long:  fmt.Sprintf(longMasterDescription, fullName),
                     +		Run: func(c *cobra.Command, args []string) {
                     +			run.Diagnose(&options.AllDiagnosticsOptions{
                     +				MasterDiagOptions: opts,
                     +				DiagOptions:       opts.DiagOptions,
                     +				GlobalFlags:       c.PersistentFlags(),
                     +			})
                     +		},
                     +	}
                     +	cmd.SetOutput(out) // for output re: usage / help
                     +	opts.MustCheck = true
                     +	opts.MasterStartOptions = &start.MasterOptions{MasterArgs: start.MasterArgsAndFlags(cmd.Flags())}
                     +	opts.BindFlags(cmd.Flags(), options.NewMasterDiagnosticsFlagInfos())
                     +	opts.DiagOptions.BindFlags(cmd.Flags(), options.NewDiagnosticsFlagInfos())
+                    +
                     +	cmd.AddCommand(NewOptionsCommand())
                     +	return cmd
                     +}
+                    +
                     +const longNodeDescription = `
                     +OpenShift Diagnostics
+                    +
                     +This command helps you understand and troubleshoot a running OpenShift
                     +node. It is intended to be run from the same context as the node
                     +(where "openshift start" or "openshift start node" is run, possibly from
                     +systemd or inside a container) and with the same configuration options.
+                    +
                     +    $ %s
                     +`
+                    +
                     +func NewNodeCommand(name string, fullName string, out io.Writer) *cobra.Command {
                     +	opts := options.NewNodeDiagnosticsOptions(out, nil)
                     +	cmd := &cobra.Command{
                     +		Use:   name,
                     +		Short: "Troubleshoot an OpenShift v3 node.",
                     +		Long:  fmt.Sprintf(longNodeDescription, fullName),
                     +		Run: func(c *cobra.Command, args []string) {
                     +			run.Diagnose(&options.AllDiagnosticsOptions{
                     +				NodeDiagOptions: opts,
                     +				DiagOptions:     opts.DiagOptions,
                     +				GlobalFlags:     c.PersistentFlags(),
                     +			})
                     +		},
                     +	}
                     +	cmd.SetOutput(out) // for output re: usage / help
                     +	opts.MustCheck = true
                     +	opts.NodeStartOptions = &start.NodeOptions{NodeArgs: start.NodeArgsAndFlags(cmd.Flags())}
                     +	opts.BindFlags(cmd.Flags(), options.NewNodeDiagnosticsFlagInfos())
                     +	opts.DiagOptions.BindFlags(cmd.Flags(), options.NewDiagnosticsFlagInfos())
+                    +
                     +	cmd.AddCommand(NewOptionsCommand())
                     +	return cmd
                     +}
+                    +
                     +func NewOptionsCommand() *cobra.Command {
                     +	cmd := &cobra.Command{
                     +		Use: "options",
                     +		Run: func(cmd *cobra.Command, args []string) {
                     +			cmd.Usage()
                     +		},
                     +	}
+                    +
                     +	templates.UseOptionsTemplates(cmd)
+                    +
                     +	return cmd
                     +}

pkg/cmd/experimental/diagnostics/options/all.go

History View file @ 42fb599

                     new file mode 100644
@@ -0,0 +1,53 @@
                     +package options
+                    +
                     +import (
                     +	"github.com/spf13/pflag"
                     +	"io"
                     +)
+                    +
                     +// user options for openshift-diagnostics main command
                     +type AllDiagnosticsOptions struct {
                     +	DiagOptions       *DiagnosticsOptions
                     +	ClientDiagOptions *ClientDiagnosticsOptions
                     +	MasterDiagOptions *MasterDiagnosticsOptions
                     +	NodeDiagOptions   *NodeDiagnosticsOptions
                     +	ClientConfigPath  string
                     +	MasterConfigPath  string
                     +	NodeConfigPath    string
+                    +
                     +	// there are cases where discovery has to look up flags created indirectly
                     +	GlobalFlags *pflag.FlagSet
                     +}
+                    +
                     +// definitions used to bind the options to actual flags on a command
                     +type AllDiagnosticsFlagInfos struct {
                     +	ClientConfigPath FlagInfo
                     +	MasterConfigPath FlagInfo
                     +	NodeConfigPath   FlagInfo
                     +}
+                    +
                     +func NewAllDiagnosticsOptions(out io.Writer) *AllDiagnosticsOptions {
                     +	common := NewDiagnosticsOptions(out)
+                    +
                     +	return &AllDiagnosticsOptions{
                     +		DiagOptions:       common,
                     +		ClientDiagOptions: NewClientDiagnosticsOptions(nil, common),
                     +		MasterDiagOptions: NewMasterDiagnosticsOptions(nil, common),
                     +		NodeDiagOptions:   NewNodeDiagnosticsOptions(nil, common),
                     +	}
                     +}
+                    +
                     +// default overrideable flag specifications to be bound to options.
                     +func NewAllDiagnosticsFlagInfos() *AllDiagnosticsFlagInfos {
                     +	return &AllDiagnosticsFlagInfos{
                     +		ClientConfigPath: FlagInfo{FlagAllClientConfigName, "", "", "Path to the client config file."},
                     +		MasterConfigPath: FlagInfo{FlagAllMasterConfigName, "", "", "Path to the master config file."},
                     +		NodeConfigPath:   FlagInfo{FlagAllNodeConfigName, "", "", "Path to the node config file."},
                     +	}
                     +}
+                    +
                     +func (o *AllDiagnosticsOptions) BindFlags(cmdFlags *pflag.FlagSet, flagInfos *AllDiagnosticsFlagInfos) {
                     +	flagInfos.ClientConfigPath.BindStringFlag(cmdFlags, &o.ClientConfigPath)
                     +	flagInfos.MasterConfigPath.BindStringFlag(cmdFlags, &o.MasterConfigPath)
                     +	flagInfos.NodeConfigPath.BindStringFlag(cmdFlags, &o.NodeConfigPath)
                     +}

pkg/cmd/experimental/diagnostics/options/client.go

History View file @ 42fb599

                     new file mode 100644
@@ -0,0 +1,46 @@
                     +package options
+                    +
                     +import (
                     +	osclientcmd "github.com/openshift/origin/pkg/cmd/util/clientcmd"
                     +	"github.com/spf13/pflag"
                     +	"io"
                     +)
+                    +
                     +// user options for openshift-diagnostics client command
                     +type ClientDiagnosticsOptions struct {
                     +	DiagOptions *DiagnosticsOptions
                     +	Factory     *osclientcmd.Factory
                     +	MustCheck   bool // set for "diagnostics client" which requires diagnosing client even there is if no config file
                     +	// Turns out we don't need to add any flags... YET
                     +}
+                    +
                     +// definitions used to bind the options to actual flags on a command
                     +type ClientDiagnosticsFlagInfos struct {
                     +	// don't need yet...
                     +	//Something   FlagInfo
                     +}
+                    +
                     +// supply output writer or pre-created DiagnosticsOptions
                     +func NewClientDiagnosticsOptions(out io.Writer, opts *DiagnosticsOptions) *ClientDiagnosticsOptions {
                     +	if opts != nil {
                     +		return &ClientDiagnosticsOptions{
                     +			DiagOptions: opts,
                     +		}
                     +	} else if out != nil {
                     +		return &ClientDiagnosticsOptions{
                     +			DiagOptions: NewDiagnosticsOptions(out),
                     +		}
                     +	}
                     +	return nil
                     +}
+                    +
                     +// default overrideable flag specifications to be bound to options.
                     +func NewClientDiagnosticsFlagInfos() *ClientDiagnosticsFlagInfos {
                     +	return &ClientDiagnosticsFlagInfos{
                     +	//NodeConfigPath:   FlagInfo{"node-config", "", "", "Path to the node config file."},
                     +	}
                     +}
+                    +
                     +func (o *ClientDiagnosticsOptions) BindFlags(cmdFlags *pflag.FlagSet, flagInfos *ClientDiagnosticsFlagInfos) {
                     +	//flagInfos.Something.BindStringFlag(cmdFlags, &o.Something)
                     +}

pkg/cmd/experimental/diagnostics/options/diagnostics.go

History View file @ 42fb599

                     new file mode 100644
@@ -0,0 +1,46 @@
                     +package options
+                    +
                     +import (
                     +	"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
                     +	cmdutil "github.com/openshift/origin/pkg/cmd/util"
                     +	"github.com/spf13/pflag"
                     +	"io"
                     +)
+                    +
                     +// all of the diagnostics commands will bind these options
                     +type DiagnosticsOptions struct {
                     +	Diagnostics *util.StringList // named diagnostics to run
                     +	DiagLevel   int              // show output of this priority or higher
                     +	DiagFormat  string           // format of output - text/json/yaml
+                    +
                     +	Output cmdutil.Output // this is used for discovery and diagnostic output
                     +}
+                    +
                     +func NewDiagnosticsOptions(out io.Writer) *DiagnosticsOptions {
                     +	return &DiagnosticsOptions{
                     +		Diagnostics: &util.StringList{}, // have to instantiate in order to bind flag
                     +		Output:      cmdutil.Output{out},
                     +	}
                     +}
+                    +
                     +// definitions used to bind the options to actual flags on a command
                     +type DiagnosticsFlagInfos struct {
                     +	Diagnostics FlagInfo
                     +	DiagLevel   FlagInfo
                     +	DiagFormat  FlagInfo
                     +}
+                    +
                     +// default overrideable flag specifications to be bound to options.
                     +func NewDiagnosticsFlagInfos() *DiagnosticsFlagInfos {
                     +	return &DiagnosticsFlagInfos{
                     +		Diagnostics: FlagInfo{FlagDiagnosticsName, "d", "", `comma-separated list of diagnostic names to run, e.g. "systemd.AnalyzeLogs"`},
                     +		DiagLevel:   FlagInfo{FlagLevelName, "l", "3", "Level of diagnostic output: 0: Error, 1: Warn, 2: Notice, 3: Info, 4: Debug"},
                     +		DiagFormat:  FlagInfo{FlagFormatName, "o", "text", "Output format: text|json|yaml"},
                     +	}
                     +}
+                    +
                     +func (o *DiagnosticsOptions) BindFlags(cmdFlags *pflag.FlagSet, flagInfos *DiagnosticsFlagInfos) {
                     +	flagInfos.Diagnostics.BindListFlag(cmdFlags, o.Diagnostics)
                     +	flagInfos.DiagLevel.BindIntFlag(cmdFlags, &o.DiagLevel)
                     +	flagInfos.DiagFormat.BindStringFlag(cmdFlags, &o.DiagFormat)
                     +}

pkg/cmd/experimental/diagnostics/options/flaginfo.go

History View file @ 42fb599

                     new file mode 100644
@@ -0,0 +1,57 @@
                     +package options
+                    +
                     +import (
                     +	kclientcmd "github.com/GoogleCloudPlatform/kubernetes/pkg/client/clientcmd"
                     +	kutil "github.com/GoogleCloudPlatform/kubernetes/pkg/util"
                     +	"github.com/spf13/pflag"
                     +	"strconv"
                     +)
+                    +
                     +type FlagInfo kclientcmd.FlagInfo // reuse to add methods
+                    +
                     +// FlagInfos serve as a customizable intermediary between the command flags and
                     +// the options object they feed into. This enables reuse of the flags and options
                     +// with tweaked definitions in different contexts if necessary.
+                    +
                     +func (i FlagInfo) BindStringFlag(flags *pflag.FlagSet, target *string) {
                     +	// assume flags with no longname are not desired
                     +	if len(i.LongName) > 0 {
                     +		flags.StringVarP(target, i.LongName, i.ShortName, i.Default, i.Description)
                     +	}
                     +}
+                    +
                     +func (i FlagInfo) BindIntFlag(flags *pflag.FlagSet, target *int) {
                     +	// assume flags with no longname are not desired
                     +	if len(i.LongName) > 0 {
                     +		// try to parse Default as an int.  If it fails, assume 0
                     +		intVal, _ := strconv.ParseInt(i.Default, 10, 0)
                     +		flags.IntVarP(target, i.LongName, i.ShortName, int(intVal), i.Description)
                     +	}
                     +}
+                    +
                     +func (i FlagInfo) BindBoolFlag(flags *pflag.FlagSet, target *bool) {
                     +	// assume flags with no longname are not desired
                     +	if len(i.LongName) > 0 {
                     +		// try to parse Default as a bool.  If it fails, assume false
                     +		boolVal, _ := strconv.ParseBool(i.Default)
                     +		flags.BoolVarP(target, i.LongName, i.ShortName, boolVal, i.Description)
                     +	}
                     +}
+                    +
                     +func (i FlagInfo) BindListFlag(flags *pflag.FlagSet, target *kutil.StringList) {
                     +	// assume flags with no longname are not desired
                     +	if len(i.LongName) > 0 {
                     +		flags.VarP(target, i.LongName, i.ShortName, i.Description)
                     +	}
                     +}
+                    +
                     +const (
                     +	FlagAllClientConfigName = "client-config"
                     +	FlagAllMasterConfigName = "master-config"
                     +	FlagAllNodeConfigName   = "node-config"
                     +	FlagDiagnosticsName     = "diagnostics"
                     +	FlagLevelName           = "diaglevel"
                     +	FlagFormatName          = "output"
                     +	FlagMasterConfigName    = "config"
                     +	FlagNodeConfigName      = "config"
                     +)

pkg/cmd/experimental/diagnostics/options/master.go

History View file @ 42fb599

                     new file mode 100644
@@ -0,0 +1,45 @@
                     +package options
+                    +
                     +import (
                     +	"github.com/openshift/origin/pkg/cmd/server/start"
                     +	"github.com/spf13/pflag"
                     +	"io"
                     +)
+                    +
                     +// user options for openshift-diagnostics master command
                     +type MasterDiagnosticsOptions struct {
                     +	DiagOptions *DiagnosticsOptions
                     +	MustCheck   bool // set for "diagnostics master" which requires diagnosing master even if there is no config file
                     +	// reuse the master options from "openshift start master"
                     +	MasterStartOptions *start.MasterOptions
                     +}
+                    +
                     +// definitions used to bind the options to actual flags on a command
                     +type MasterDiagnosticsFlagInfos struct {
                     +	ConfigFile FlagInfo
                     +}
+                    +
                     +// supply output writer or pre-created DiagnosticsOptions
                     +func NewMasterDiagnosticsOptions(out io.Writer, opts *DiagnosticsOptions) *MasterDiagnosticsOptions {
                     +	if opts != nil {
                     +		return &MasterDiagnosticsOptions{
                     +			DiagOptions: opts,
                     +		}
                     +	} else if out != nil {
                     +		return &MasterDiagnosticsOptions{
                     +			DiagOptions: NewDiagnosticsOptions(out),
                     +		}
                     +	}
                     +	return nil
                     +}
+                    +
                     +// default overrideable flag specifications to be bound to options.
                     +func NewMasterDiagnosticsFlagInfos() *MasterDiagnosticsFlagInfos {
                     +	return &MasterDiagnosticsFlagInfos{
                     +		ConfigFile: FlagInfo{FlagMasterConfigName, "", "", "Location of the master configuration file to run from. When running from a configuration file, all other command-line arguments are ignored."},
                     +	}
                     +}
+                    +
                     +func (o *MasterDiagnosticsOptions) BindFlags(cmdFlags *pflag.FlagSet, flagInfos *MasterDiagnosticsFlagInfos) {
                     +	flagInfos.ConfigFile.BindStringFlag(cmdFlags, &o.MasterStartOptions.ConfigFile)
                     +}

pkg/cmd/experimental/diagnostics/options/node.go

History View file @ 42fb599

                     new file mode 100644
@@ -0,0 +1,45 @@
                     +package options
+                    +
                     +import (
                     +	"github.com/openshift/origin/pkg/cmd/server/start"
                     +	"github.com/spf13/pflag"
                     +	"io"
                     +)
+                    +
                     +// user options for openshift-diagnostics node command
                     +type NodeDiagnosticsOptions struct {
                     +	DiagOptions *DiagnosticsOptions
                     +	MustCheck   bool // set for "diagnostics node" which requires diagnosing node even if there is no config file
                     +	// reuse the node options from "openshift start node"
                     +	NodeStartOptions *start.NodeOptions
                     +}
+                    +
                     +// definitions used to bind the options to actual flags on a command
                     +type NodeDiagnosticsFlagInfos struct {
                     +	ConfigFile FlagInfo
                     +}
+                    +
                     +// supply output writer or pre-created DiagnosticsOptions
                     +func NewNodeDiagnosticsOptions(out io.Writer, opts *DiagnosticsOptions) *NodeDiagnosticsOptions {
                     +	if opts != nil {
                     +		return &NodeDiagnosticsOptions{
                     +			DiagOptions: opts,
                     +		}
                     +	} else if out != nil {
                     +		return &NodeDiagnosticsOptions{
                     +			DiagOptions: NewDiagnosticsOptions(out),
                     +		}
                     +	}
                     +	return nil
                     +}
+                    +
                     +// default overrideable flag specifications to be bound to options.
                     +func NewNodeDiagnosticsFlagInfos() *NodeDiagnosticsFlagInfos {
                     +	return &NodeDiagnosticsFlagInfos{
                     +		ConfigFile: FlagInfo{FlagNodeConfigName, "", "", "Location of the node configuration file to run from. When running from a configuration file, all other command-line arguments are ignored."},
                     +	}
                     +}
+                    +
                     +func (o *NodeDiagnosticsOptions) BindFlags(cmdFlags *pflag.FlagSet, flagInfos *NodeDiagnosticsFlagInfos) {
                     +	flagInfos.ConfigFile.BindStringFlag(cmdFlags, &o.NodeStartOptions.ConfigFile)
                     +}

pkg/cmd/openshift/openshift.go

History View file @ 42fb599

@@ -12,6 +12,7 @@ import (
                      	"github.com/openshift/origin/pkg/cmd/cli"
                      	"github.com/openshift/origin/pkg/cmd/cli/cmd"
                      	"github.com/openshift/origin/pkg/cmd/experimental/buildchain"
                     +	diagnostics "github.com/openshift/origin/pkg/cmd/experimental/diagnostics"
                      	exipfailover "github.com/openshift/origin/pkg/cmd/experimental/ipfailover"
                      	"github.com/openshift/origin/pkg/cmd/experimental/tokens"
                      	"github.com/openshift/origin/pkg/cmd/flagtypes"
@@ -52,6 +53,8 @@ func CommandFor(basename string) *cobra.Command {
                      		cmd = irouter.NewCommandRouter(basename)
                      	case "openshift-deploy":
                      		cmd = deployer.NewCommandDeployer(basename)
                     +	case "openshift-diagnostics":
                     +		cmd = diagnostics.NewCommandDiagnostics(basename, basename, os.Stdout)
                      	case "openshift-sti-build":
                      		cmd = builder.NewCommandSTIBuilder(basename)
                      	case "openshift-docker-build":
@@ -155,6 +158,7 @@ func newExperimentalCommand(name, fullName string) *cobra.Command {
                      	experimental.AddCommand(tokens.NewCmdTokens(tokens.TokenRecommendedCommandName, fullName+" "+tokens.TokenRecommendedCommandName, f, out))
                      	experimental.AddCommand(exipfailover.NewCmdIPFailoverConfig(f, fullName, "ipfailover", out))
                      	experimental.AddCommand(buildchain.NewCmdBuildChain(name, fullName+" "+buildchain.BuildChainRecommendedCommandName, f, out))
                     +	experimental.AddCommand(diagnostics.NewCommandDiagnostics("diagnostics", fullName+" diagnostics", out))
                      	experimental.AddCommand(cmd.NewCmdOptions(out))
                      	return experimental
+                     }

pkg/diagnostics/README.md

History View file @ 42fb599

                     new file mode 100644
@@ -0,0 +1,84 @@
                     +OpenShift v3 Diagnostics
                     +========================
+                    +
                     +This is a tool to help administrators and users resolve common problems
                     +that occur with OpenShift v3 deployments. It is currently (May 2015)
                     +under continuous development as the OpenShift Origin project progresses.
+                    +
                     +The goals of the diagnostics tool are summarized in this [Trello
                     +card](https://trello.com/c/LdUogKuN). Diagnostics are included as an
                     +`openshift` binary sub-command that analyzes OpenShift as it finds it,
                     +whether from the perspective of an OpenShift client or on an OpenShift
                     +host.
+                    +
                     +Expected environment
                     +====================
+                    +
                     +OpenShift can be deployed in many ways: built from source, included
                     +in a VM image, in a Docker image, or as enterprise RPMs. Each of these
                     +would imply different configuration and environment. In order to keep
                     +assumptions about environment to a minimum, the diagnostics have been
                     +added to the `openshift` binary itself so that wherever there is an
                     +OpenShift server or client, the diagnostics can run in the exact same
                     +environment.
+                    +
                     +`openshift ex diagnostics` subcommands for master, node, and client
                     +provide flags to mimic the configurations for those respective components,
                     +so that running diagnostics against a component should be as simple as
                     +supplying the same flags that would invoke the component. So,
                     +for example, if a master is started with:
+                    +
                     +    openshift start master --public-hostname=...
+                    +
                     +Then diagnostics against that master would simply be run as:
+                    +
                     +    openshift ex diagnostics master --public-hostname=...
+                    +
                     +In this way it should be possible to invoke diagnostics against any
                     +given environment.
+                    +
                     +Host environment
                     +================
+                    +
                     +However, master/node diagnostics will be most useful in a specific
                     +target environment, which is a deployment using Enterprise RPMs and
                     +ansible deployment logic. This provides two major benefits:
+                    +
                     +* master/node configuration is based on a configuration file in a standard location
                     +* all components log to journald
+                    +
                     +Having configuration file in standard locations means you will generally
                     +not even need to specify where to find them. Running:
+                    +
                     +    openshift ex diagnostics
+                    +
                     +by itself will look for master and node configs (in addition to client
                     +config file) in the standard locations and use them if found; so this
                     +should make the Enterprise use case as simple as possible. It's also
                     +very easy to use configuration files when they are not in the expected
                     +Enterprise locations:
+                    +
                     +    openshift ex diagnostics --master-config=... --node-config=...
+                    +
                     +Having logs in journald is necessary for the current log analysis
                     +logic. Other usage may have logs going into files, output to stdout,
                     +combined node/master... it may not be too hard to extend analysis to
                     +other log sources but the priority has been to look at journald logs
                     +as created by components in Enterprise deployments (including docker,
                     +openvswitch, etc.).
+                    +
                     +Client environment
                     +==================
+                    +
                     +The user may only have access as an ordinary user, as a cluster-admin
                     +user, or may have admin on a host where OpenShift master or node services
                     +are operating. The diagnostics will attempt to use as much access as
                     +the user has available.
+                    +
                     +A client with ordinary access should be able to diagnose its connection
                     +to the master and look for problems in builds and deployments.
+                    +
                     +A client with cluster-admin access should be able to diagnose the same
                     +things for every project in the deployment, as well as infrastructure
                     +status.
+                    +

pkg/diagnostics/client/client.go

History View file @ 42fb599

                     new file mode 100644
@@ -0,0 +1,293 @@
                     +package client
+                    +
                     +import (
                     +	"fmt"
                     +	kapi "github.com/GoogleCloudPlatform/kubernetes/pkg/api"
                     +	kerrs "github.com/GoogleCloudPlatform/kubernetes/pkg/api/errors"
                     +	client "github.com/GoogleCloudPlatform/kubernetes/pkg/client"
                     +	clientcmdapi "github.com/GoogleCloudPlatform/kubernetes/pkg/client/clientcmd/api"
                     +	"github.com/GoogleCloudPlatform/kubernetes/pkg/fields"
                     +	"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
                     +	osclient "github.com/openshift/origin/pkg/client"
                     +	"github.com/openshift/origin/pkg/diagnostics/discovery"
                     +	"github.com/openshift/origin/pkg/diagnostics/log"
                     +	"github.com/openshift/origin/pkg/diagnostics/types/diagnostic"
                     +	osapi "github.com/openshift/origin/pkg/image/api"
                     +	"reflect"
                     +	"strings"
                     +)
+                    +
                     +var Diagnostics = map[string]diagnostic.Diagnostic{
                     +	"NodeDefinitions": {
                     +		Description: "Check node records on master",
                     +		Condition: func(env *discovery.Environment) (skip bool, reason string) {
                     +			if env.ClusterAdminFactory == nil {
                     +				return true, "Client does not have cluster-admin access and cannot see node records"
                     +			}
                     +			return false, ""
                     +		},
                     +		Run: func(env *discovery.Environment) {
                     +			var err error
                     +			var nodes *kapi.NodeList
                     +			if _, kclient, err := env.ClusterAdminFactory.Clients(); err == nil {
                     +				nodes, err = kclient.Nodes().List(labels.LabelSelector{}, fields.Everything())
                     +			}
                     +			if err != nil {
                     +				env.Log.Errorf("clGetNodesFailed", `
                     +Client error while retrieving node records. Client retrieved records
                     +during discovery, so this is likely to be a transient error. Try running
                     +diagnostics again. If this message persists, there may be a permissions
                     +problem with getting node records. The error was:
+                    +
                     +(%T) %[1]v`, err)
                     +				return
                     +			}
                     +			for _, node := range nodes.Items {
                     +				//pretty.Println("Node record:", node)
                     +				var ready *kapi.NodeCondition
                     +				for i, condition := range node.Status.Conditions {
                     +					switch condition.Type {
                     +					// currently only one... used to be more, may be again
                     +					case kapi.NodeReady:
                     +						ready = &node.Status.Conditions[i]
                     +					}
                     +				}
                     +				//pretty.Println("Node conditions for "+node.Name, ready, schedulable)
                     +				if ready == nil || ready.Status != kapi.ConditionTrue {
                     +					msg := log.Msg{
                     +						"node": node.Name,
                     +						"tmpl": `
                     +Node {{.node}} is defined but is not marked as ready.
                     +Ready status is {{.status}} because "{{.reason}}"
                     +If the node is not intentionally disabled, check that the master can
                     +reach the node hostname for a health check and the node is checking in
                     +to the master with the same hostname.
+                    +
                     +While in this state, pods should not be scheduled to deploy on the node,
                     +and any existing scheduled pods will be considered failed and removed.
                     + `,
                     +					}
                     +					if ready == nil {
                     +						msg["status"] = "None"
                     +						msg["reason"] = "There is no readiness record."
                     +					} else {
                     +						msg["status"] = ready.Status
                     +						msg["reason"] = ready.Reason
                     +					}
                     +					env.Log.Warnm("clNodeBroken", msg)
                     +				}
                     +			}
                     +		},
                     +	},
+                    +
                     +	"ConfigContexts": {
                     +		Description: "Test that client config contexts have no undefined references",
                     +		Condition: func(env *discovery.Environment) (skip bool, reason string) {
                     +			if env.ClientConfigRaw == nil {
                     +				return true, "There is no client config file"
                     +			}
                     +			return false, ""
                     +		},
                     +		Run: func(env *discovery.Environment) {
                     +			cc := env.ClientConfigRaw
                     +			current := cc.CurrentContext
                     +			ccSuccess := false
                     +			var ccResult log.Msg //nil
                     +			for context := range cc.Contexts {
                     +				result, success := TestContext(context, cc)
                     +				msg := log.Msg{"tmpl": "For client config context '{{.context}}':{{.result}}", "context": context, "result": result}
                     +				if context == current {
                     +					ccResult, ccSuccess = msg, success
                     +				} else if success {
                     +					env.Log.Infom("clientCfgSuccess", msg)
                     +				} else {
                     +					env.Log.Warnm("clientCfgWarn", msg)
                     +				}
                     +			}
                     +			if _, exists := cc.Contexts[current]; exists {
                     +				ccResult["tmpl"] = `
                     +The current context from client config is '{{.context}}'
                     +This will be used by default to contact your OpenShift server.
                     +` + ccResult["tmpl"].(string)
                     +				if ccSuccess {
                     +					env.Log.Infom("currentccSuccess", ccResult)
                     +				} else {
                     +					env.Log.Errorm("currentccWarn", ccResult)
                     +				}
                     +			} else { // context does not exist
                     +				env.Log.Errorm("cConUndef", log.Msg{"tmpl": `
                     +Your client config specifies a current context of '{{.context}}'
                     +which is not defined; it is likely that a mistake was introduced while
                     +manually editing your config. If this is a simple typo, you may be
                     +able to fix it manually.
                     +The OpenShift master creates a fresh config when it is started; it may be
                     +useful to use this as a base if available.`, "context": current})
                     +			}
                     +		},
                     +	},
+                    +
                     +	"ClusterRegistry": {
                     +		Description: "Check there is a working Docker registry",
                     +		Condition: func(env *discovery.Environment) (skip bool, reason string) {
                     +			if env.ClusterAdminFactory == nil {
                     +				return true, "Client does not have cluster-admin access and cannot see registry objects"
                     +			}
                     +			return false, ""
                     +		},
                     +		Run: func(env *discovery.Environment) {
                     +			osClient, kclient, err := env.ClusterAdminFactory.Clients()
                     +			if err != nil {
                     +				env.Log.Errorf("clGetClientFailed", "Constructing clients failed. This should never happen. Error: (%T) %[1]v", err)
                     +				return
                     +			}
                     +			// retrieve the service if it exists
                     +			if service := getRegistryService(kclient, env.Log); service != nil {
                     +				// Check that it actually has a pod selected that's running
                     +				if pod := getRegistryPod(kclient, service, env.Log); pod != nil {
                     +					// Check that an endpoint exists on the service
                     +					if endPoint := getRegistryEndpoint(kclient, env.Log); endPoint != nil {
                     +						// TODO: Check that endpoints on the service match the pod (hasn't been a problem yet though)
                     +						// TODO: Check the logs for that pod for common issues (credentials, DNS resolution failure)
                     +						// attempt to create an imagestream and see if it gets the same registry service IP from the service cache
                     +						testRegistryImageStream(osClient, service, env.Log)
                     +					}
                     +				}
                     +			}
+                    +
                     +		},
                     +	},
                     +}
+                    +
                     +func TestContext(contextName string, config *clientcmdapi.Config) (result string, success bool) {
                     +	context, exists := config.Contexts[contextName]
                     +	if !exists {
                     +		return "client config context '" + contextName + "' is not defined.", false
                     +	}
                     +	clusterName := context.Cluster
                     +	cluster, exists := config.Clusters[clusterName]
                     +	if !exists {
                     +		return fmt.Sprintf("client config context '%s' has a cluster '%s' which is not defined.", contextName, clusterName), false
                     +	}
                     +	authName := context.AuthInfo
                     +	if _, exists := config.AuthInfos[authName]; !exists {
                     +		return fmt.Sprintf("client config context '%s' has a user identity '%s' which is not defined.", contextName, authName), false
                     +	}
                     +	project := context.Namespace
                     +	if project == "" {
                     +		project = kapi.NamespaceDefault // OpenShift/k8s fills this in if missing
                     +	}
                     +	// TODO: actually send a request to see if can connect
                     +	return fmt.Sprintf(`
                     +The server URL is '%s'
                     +The user authentication is '%s'
                     +The current project is '%s'`, cluster.Server, authName, project), true
                     +}
+                    +
                     +func getRegistryService(kclient *client.Client, logger *log.Logger) *kapi.Service {
                     +	service, err := kclient.Services("default").Get("docker-registry")
                     +	if err != nil && reflect.TypeOf(err) == reflect.TypeOf(&kerrs.StatusError{}) {
                     +		logger.Warnf("clGetRegFailed", `
                     +There is no "docker-registry" service. This is not strictly required
                     +to use OpenShift, however it is required for builds and its absence
                     +probably indicates an incomplete installation of OpenShift.
+                    +
                     +Please use the 'osadm registry' command to create a registry.
                     +				`)
                     +		return nil
                     +	} else if err != nil {
                     +		logger.Errorf("clGetRegFailed", `
                     +Client error while retrieving registry service. Client retrieved records
                     +during discovery, so this is likely to be a transient error. Try running
                     +diagnostics again. If this message persists, there may be a permissions
                     +problem with getting records. The error was:
+                    +
                     +(%T) %[1]v`, err)
                     +		return nil
                     +	}
                     +	logger.Debugf("clRegFound", "Found docker-registry service with ports %v", service.Spec.Ports)
                     +	return service
                     +}
+                    +
                     +func getRegistryPod(kclient *client.Client, service *kapi.Service, logger *log.Logger) *kapi.Pod {
                     +	pods, err := kclient.Pods("default").List(labels.SelectorFromSet(service.Spec.Selector), fields.Everything())
                     +	if err != nil {
                     +		logger.Errorf("clRegListPods", "Finding pods for 'docker-registry' service failed. This should never happen. Error: (%T) %[1]v", err)
                     +		return nil
                     +	} else if len(pods.Items) < 1 {
                     +		logger.Error("clRegNoPods", `
                     +The "docker-registry" service exists but has no associated pods, so it
                     +is not available. Builds and deployments that use the registry will fail.`)
                     +		return nil
                     +	} else if len(pods.Items) > 1 {
                     +		logger.Error("clRegNoPods", `
                     +The "docker-registry" service has multiple associated pods. Load-balanced
                     +registries are not yet available, so these are likely to have incomplete
                     +stores of images. Builds and deployments that use the registry will
                     +fail sporadically.`)
                     +		return nil
                     +	}
                     +	pod := &pods.Items[0]
                     +	if pod.Status.Phase != kapi.PodRunning {
                     +		logger.Errorf("clRegPodDown", `
                     +The "%s" pod for the "docker-registry" service is not running.
                     +This may be transient, a scheduling error, or something else.
                     +Builds and deployments that require the registry will fail.`, pod.ObjectMeta.Name)
                     +		return nil
                     +	}
                     +	logger.Debugf("clRegPodFound", "Found docker-registry pod with name %s", pod.ObjectMeta.Name)
                     +	return pod
                     +}
+                    +
                     +func getRegistryEndpoint(kclient *client.Client, logger *log.Logger) *kapi.Endpoints {
                     +	endPoint, err := kclient.Endpoints("default").Get("docker-registry")
                     +	if err != nil {
                     +		logger.Errorf("clRegGetEP", "Finding endpoints for 'docker-registry' service failed. This should never happen. Error: (%T) %[1]v", err)
                     +		return nil
                     +	} else if len(endPoint.Subsets) != 1 || len(endPoint.Subsets[0].Addresses) != 1 {
                     +		logger.Warn("clRegNoEP", `
                     +The "docker-registry" service exists with one associated pod, but the
                     +number of endpoints in the "docker-registry" endpoint object does not
                     +match. This mismatch probably indicates a bug in OpenShift and it is
                     +likely that builds and deployments that require the registry will fail.`)
                     +		return nil
                     +	}
                     +	logger.Debugf("clRegPodFound", "Found docker-registry endpoint object")
                     +	return endPoint
                     +}
+                    +
                     +func testRegistryImageStream(client *osclient.Client, service *kapi.Service, logger *log.Logger) {
                     +	imgStream, err := client.ImageStreams("default").Create(&osapi.ImageStream{ObjectMeta: kapi.ObjectMeta{GenerateName: "diagnostic-test-"}})
                     +	if err != nil {
                     +		logger.Errorf("clRegISCFail", "Creating test ImageStream failed. Error: (%T) %[1]v", err)
                     +		return
                     +	}
                     +	defer client.ImageStreams("default").Delete(imgStream.ObjectMeta.Name)         // TODO: report if deleting fails
                     +	imgStream, err = client.ImageStreams("default").Get(imgStream.ObjectMeta.Name) // status is filled in post-create
                     +	if err != nil {
                     +		logger.Errorf("clRegISCFail", "Getting created test ImageStream failed. Error: (%T) %[1]v", err)
                     +		return
                     +	}
                     +	logger.Debugf("clRegISC", "Created test ImageStream: %[1]v", imgStream)
                     +	cacheHost := strings.SplitN(imgStream.Status.DockerImageRepository, "/", 2)[0]
                     +	serviceHost := fmt.Sprintf("%s:%d", service.Spec.PortalIP, service.Spec.Ports[0].Port)
                     +	if cacheHost != serviceHost {
                     +		logger.Errorm("clRegISMismatch", log.Msg{
                     +			"serviceHost": serviceHost,
                     +			"cacheHost":   cacheHost,
                     +			"tmpl": `
                     +Diagnostics created a test ImageStream and compared the registry IP
                     +it received to the registry IP available via the docker-registry service.
+                    +
                     +docker-registry      : {{.serviceHost}}
                     +ImageStream registry : {{.cacheHost}}
+                    +
                     +They differ, which probably means that an administrator re-created
                     +the docker-registry service but the master has cached the old service
                     +IP address. Builds or deployments that use ImageStreams with the wrong
                     +docker-registry IP will fail under this condition.
+                    +
                     +To resolve this issue, restarting the master (to clear the cache) should
                     +be sufficient.
                     +`})
                     +	}
                     +}

pkg/diagnostics/discovery/client.go

History View file @ 42fb599

                     new file mode 100644
@@ -0,0 +1,104 @@
                     +package discovery // client
+                    +
                     +import (
                     +	"fmt"
                     +	"github.com/openshift/origin/pkg/diagnostics/log"
                     +	"github.com/openshift/origin/pkg/diagnostics/types"
                     +	"os"
                     +	"os/exec"
                     +	"path/filepath"
                     +	"runtime"
                     +	"strings"
                     +)
+                    +
                     +// ----------------------------------------------------------
                     +// Look for 'osc' and 'openshift' executables
                     +func (env *Environment) DiscoverClient() error {
                     +	var err error
                     +	f := env.Options.ClientDiagOptions.Factory
                     +	if config, err := f.OpenShiftClientConfig.RawConfig(); err != nil {
                     +		env.Log.Errorf("discCCstart", "Could not read client config: (%T) %[1]v", err)
                     +	} else {
                     +		env.OsConfig = &config
                     +		env.FactoryForContext[config.CurrentContext] = f
                     +	}
                     +	env.Log.Debug("discSearchExec", "Searching for executables in path:\n  "+strings.Join(filepath.SplitList(os.Getenv("PATH")), "\n  ")) //TODO for non-Linux OS
                     +	env.OscPath = env.findExecAndLog("osc")
                     +	if env.OscPath != "" {
                     +		env.OscVersion, err = getExecVersion(env.OscPath, env.Log)
                     +	}
                     +	env.OpenshiftPath = env.findExecAndLog("openshift")
                     +	if env.OpenshiftPath != "" {
                     +		env.OpenshiftVersion, err = getExecVersion(env.OpenshiftPath, env.Log)
                     +	}
                     +	if env.OpenshiftVersion.NonZero() && env.OscVersion.NonZero() && !env.OpenshiftVersion.Eq(env.OscVersion) {
                     +		env.Log.Warnm("discVersionMM", log.Msg{"osV": env.OpenshiftVersion.GoString(), "oscV": env.OscVersion.GoString(),
                     +			"text": fmt.Sprintf("'openshift' version %#v does not match 'osc' version %#v; update or remove the lower version", env.OpenshiftVersion, env.OscVersion)})
                     +	}
                     +	return err
                     +}
+                    +
                     +// ----------------------------------------------------------
                     +// Look for a specific executable and log what happens
                     +func (env *Environment) findExecAndLog(cmd string) string {
                     +	if path := findExecFor(cmd); path != "" {
                     +		env.Log.Infom("discExecFound", log.Msg{"command": cmd, "path": path, "tmpl": "Found '{{.command}}' at {{.path}}"})
                     +		return path
                     +	} else {
                     +		env.Log.Warnm("discExecNoPath", log.Msg{"command": cmd, "tmpl": "No '{{.command}}' executable was found in your path"})
                     +	}
                     +	return ""
                     +}
+                    +
                     +// ----------------------------------------------------------
                     +// Look in the path for an executable
                     +func findExecFor(cmd string) string {
                     +	path, err := exec.LookPath(cmd)
                     +	if err == nil {
                     +		return path
                     +	}
                     +	if runtime.GOOS == "windows" {
                     +		path, err = exec.LookPath(cmd + ".exe")
                     +		if err == nil {
                     +			return path
                     +		}
                     +	}
                     +	return ""
                     +}
+                    +
                     +// ----------------------------------------------------------
                     +// Invoke executable's "version" command to determine version
                     +func getExecVersion(path string, logger *log.Logger) (version types.Version, err error) {
                     +	cmd := exec.Command(path, "version")
                     +	var out []byte
                     +	out, err = cmd.CombinedOutput()
                     +	if err == nil {
                     +		var name string
                     +		var x, y, z int
                     +		if scanned, err := fmt.Sscanf(string(out), "%s v%d.%d.%d", &name, &x, &y, &z); scanned > 1 {
                     +			version = types.Version{x, y, z}
                     +			logger.Infom("discVersion", log.Msg{"tmpl": "version of {{.command}} is {{.version}}", "command": name, "version": version.GoString()})
                     +		} else {
                     +			logger.Errorf("discVersErr", `
                     +Expected version output from '%s version'
                     +Could not parse output received:
                     +%v
                     +Error was: %#v`, path, string(out), err)
                     +		}
                     +	} else {
                     +		switch err.(type) {
                     +		case *exec.Error:
                     +			logger.Errorf("discVersErr", "error in executing '%v version': %v", path, err)
                     +		case *exec.ExitError:
                     +			logger.Errorf("discVersErr", `
                     +Executed '%v version' which exited with an error code.
                     +This version is likely old or broken.
                     +Error was '%v';
                     +Output was:
                     +%v`, path, err.Error(), log.LimitLines(string(out), 5))
                     +		default:
                     +			logger.Errorf("discVersErr", "executed '%v version' but an error occurred:\n%v\nOutput was:\n%v", path, err, string(out))
                     +		}
                     +	}
                     +	return version, err
                     +}

pkg/diagnostics/discovery/config.go

History View file @ 42fb599

                     new file mode 100644
@@ -0,0 +1,378 @@
                     +package discovery // config
+                    +
                     +import (
                     +	"fmt"
                     +	kapi "github.com/GoogleCloudPlatform/kubernetes/pkg/api"
                     +	"github.com/GoogleCloudPlatform/kubernetes/pkg/client/clientcmd"
                     +	clientcmdapi "github.com/GoogleCloudPlatform/kubernetes/pkg/client/clientcmd/api"
                     +	"github.com/GoogleCloudPlatform/kubernetes/pkg/fields"
                     +	"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
                     +	"github.com/openshift/origin/pkg/cmd/cli/config"
                     +	"github.com/openshift/origin/pkg/cmd/experimental/diagnostics/options"
                     +	osclientcmd "github.com/openshift/origin/pkg/cmd/util/clientcmd"
                     +	"github.com/openshift/origin/pkg/diagnostics/log"
                     +	"io/ioutil"
                     +	"os"
                     +	"regexp"
                     +	"strings"
                     +)
+                    +
                     +/* ----------------------------------------------------------
                     +Look for the client config and try to read it.
+                    +
                     +We will look in the standard locations, alert the user to what we find
                     +as we go along, and try to be helpful.
                     +*/
+                    +
                     +// -------------------------------------------------------------
                     +// Look for client config file in a number of possible locations
                     +func (env *Environment) ReadClientConfigFiles() {
                     +	confFlagName := options.FlagAllClientConfigName
                     +	confFlag := env.Options.ClientConfigPath // from openshift-diagnostics --client-config
                     +	if flags := env.Options.GlobalFlags; flags != nil {
                     +		name := config.OpenShiftConfigFlagName
                     +		if flag := env.Options.GlobalFlags.Lookup(name); flag != nil {
                     +			confFlag = flag.Value.String() // from openshift-diagnostics client --config
                     +			confFlagName = name
                     +		}
                     +	}
                     +	var found bool
                     +	rules := config.NewOpenShiftClientConfigLoadingRules()
                     +	paths := append([]string{confFlag}, rules.Precedence...)
                     +	for index, path := range paths {
                     +		errmsg := ""
                     +		switch index {
                     +		case 0:
                     +			errmsg = fmt.Sprintf("--"+confFlagName+" specified that client config should be at %s\n", path)
                     +		case len(paths) - 1:
                     +			// do nothing, the config wasn't found in ~
                     +		default:
                     +			if len(os.Getenv(config.OpenShiftConfigPathEnvVar)) != 0 {
                     +				errmsg = fmt.Sprintf("$OPENSHIFTCONFIG specified that client config should be at %s\n", path)
                     +			}
                     +		}
+                    +
                     +		if rawConfig := openConfigFile(path, errmsg, env.Log); rawConfig != nil && !found {
                     +			found = true
                     +			env.ClientConfigPath = path
                     +			env.ClientConfigRaw = rawConfig
                     +		}
                     +	}
                     +	if found {
                     +		if confFlag != "" && confFlag != env.ClientConfigPath {
                     +			// found config but not where --config said, so don't continue discovery
                     +			env.Log.Errorf("discCCnotFlag", `
                     +The client configuration file was not found where the --%s flag indicated:
                     +  %s
                     +A config file was found at the following location:
                     +  %s
                     +If you wish to use this file for client configuration, you can specify it
                     +with the --%[1]s flag, or just not specify the flag.
                     +			`, confFlagName, confFlag, env.ClientConfigPath)
                     +		} else {
                     +			// happy path, client config found as expected
                     +			env.WillCheck[ClientTarget] = true
                     +		}
                     +	} else { // not found, decide what to do
                     +		if confFlag != "" { // user expected conf file at specific place
                     +			env.Log.Errorf("discNoCC", "The client configuration file was not found where --%s='%s' indicated.", confFlagName, confFlag)
                     +		} else if !env.Options.ClientDiagOptions.MustCheck {
                     +			env.Log.Notice("discSkipCLI", "No client config file found; client diagnostics will not be performed.")
                     +		} else {
                     +			// user specifically wants to troubleshoot client, but no conf file given
                     +			env.Log.Warn("discNoCCfile", "No client config file read; OpenShift client diagnostics will use flags and default configuration.")
                     +			env.WillCheck[ClientTarget] = true
                     +			adminPaths := []string{
                     +				"/etc/openshift/master/admin.kubeconfig",           // enterprise
                     +				"/openshift.local.config/master/admin.kubeconfig",  // origin systemd
                     +				"./openshift.local.config/master/admin.kubeconfig", // origin binary
                     +			}
                     +			adminWarningF := `
                     +No client config file was available; however, one exists at
                     +  %[1]s
                     +which is a standard location where the master generates it.
                     +If this is what you want to use, you should copy it to a standard location
                     +(~/.config/openshift/.config, or the current directory), or you can set the
                     +environment variable OPENSHIFTCONFIG in your ~/.bash_profile:
                     +  export OPENSHIFTCONFIG=%[1]s
                     +If this is not what you want, you should obtain a config file and
                     +place it in a standard location.
                     +`
                     +			// look for it in auto-generated locations when not found properly
                     +			for _, path := range adminPaths {
                     +				if conf := openConfigFile(path, "", env.Log); conf != nil {
                     +					env.Log.Warnf("discCCautoPath", adminWarningF, path)
                     +					break
                     +				}
                     +			}
                     +		}
                     +	}
                     +}
+                    +
                     +// ----------------------------------------------------------
                     +// Attempt to open file at path as client config
                     +// If there is a problem and errmsg is set, log an error
                     +func openConfigFile(path string, errmsg string, logger *log.Logger) *clientcmdapi.Config {
                     +	var err error
                     +	var file *os.File
                     +	if path == "" { // empty param/envvar
                     +		return nil
                     +	} else if file, err = os.Open(path); err == nil {
                     +		logger.Debugm("discOpenCC", log.Msg{"tmpl": "Reading client config at {{.path}}", "path": path})
                     +	} else if errmsg == "" {
                     +		logger.Debugf("discOpenCCNo", "Could not read client config at %s:\n%#v", path, err)
                     +	} else if os.IsNotExist(err) {
                     +		logger.Error("discOpenCCNoExist", errmsg+"but that file does not exist.")
                     +	} else if os.IsPermission(err) {
                     +		logger.Error("discOpenCCNoPerm", errmsg+"but lack permission to read that file.")
                     +	} else {
                     +		logger.Errorf("discOpenCCErr", "%sbut there was an error opening it:\n%#v", errmsg, err)
                     +	}
                     +	if file != nil { // it is open for reading
                     +		defer file.Close()
                     +		if buffer, err := ioutil.ReadAll(file); err != nil {
                     +			logger.Errorf("discCCReadErr", "Unexpected error while reading client config file (%s): %v", path, err)
                     +		} else if conf, err := clientcmd.Load(buffer); err != nil {
                     +			logger.Errorf("discCCYamlErr", `
                     +Error reading YAML from client config file (%s):
                     +  %v
                     +This file may have been truncated or mis-edited.
                     +Please fix, remove, or obtain a new client config`, file.Name(), err)
                     +		} else {
                     +			logger.Infom("discCCRead", log.Msg{"tmpl": `Successfully read a client config file at '{{.path}}'`, "path": path})
                     +			/* Note, we're not going to use this config file directly.
                     +			 * Instead, we'll defer to the openshift client code to assimilate
                     +			 * flags, env vars, and the potential hierarchy of config files
                     +			 * into an actual configuration that the client uses.
                     +			 * However, for diagnostic purposes, record the first we find.
                     +			 */
                     +			return conf
                     +		}
                     +	}
                     +	return nil
                     +}
+                    +
                     +/* The full client configuration may specify multiple contexts, each
                     + * of which could be a different server, a different user, a different
                     + * default project. We want to check which contexts have useful access,
                     + * and record those. At this point, we should already have the factory
                     + * for the current context. Factories embed config and a client cache,
                     + * and since we want to do discovery for every available context, we are
                     + * going to create a factory for each context. We will determine which
                     + * context actually has access to the default project, preferring the
                     + * current (default) context if it does. Connection errors should be
                     + * diagnosed along the way.
                     + */
                     +func (env *Environment) ConfigClient() {
                     +	if env.OsConfig != nil {
                     +		// TODO: run these in parallel, with a time limit so connection timeouts don't take forever
                     +		for cname, context := range env.OsConfig.Contexts {
                     +			// set context, create factory, see what's available
                     +			if env.FactoryForContext[cname] == nil {
                     +				//config := clientcmd.NewNonInteractiveClientConfig(env.Factory.OpenShiftClientConfig, cname, &clientcmd.ConfigOverrides{})
                     +				config := clientcmd.NewNonInteractiveClientConfig(*env.OsConfig, cname, &clientcmd.ConfigOverrides{})
                     +				f := osclientcmd.NewFactory(config)
                     +				//f.BindFlags(env.Flags.OpenshiftFlags)
                     +				env.FactoryForContext[cname] = f
                     +			}
                     +			if access := getContextAccess(env.FactoryForContext[cname], cname, context, env.Log); access != nil {
                     +				env.AccessForContext[cname] = access
                     +				if access.ClusterAdmin && (cname == env.OsConfig.CurrentContext || env.ClusterAdminFactory == nil) {
                     +					env.ClusterAdminFactory = env.FactoryForContext[cname]
                     +				}
                     +			}
                     +		}
                     +	}
                     +}
+                    +
                     +// for now, only try to determine what namespaces a user can see
                     +func getContextAccess(factory *osclientcmd.Factory, ctxName string, ctx clientcmdapi.Context, logger *log.Logger) *ContextAccess {
                     +	// start by getting ready to log the result
                     +	msgText := "Testing client config context {{.context}}\nServer: {{.server}}\nUser: {{.user}}\n\n"
                     +	msg := log.Msg{"id": "discCCctx", "tmpl": msgText}
                     +	if config, err := factory.OpenShiftClientConfig.RawConfig(); err != nil {
                     +		logger.Errorf("discCCstart", "Could not read client config: (%T) %[1]v", err)
                     +		return nil
                     +	} else {
                     +		msg["context"] = ctxName
                     +		msg["server"] = config.Clusters[ctx.Cluster].Server
                     +		msg["user"] = ctx.AuthInfo
                     +	}
                     +	// actually go and request project list from the server
                     +	if osclient, _, err := factory.Clients(); err != nil {
                     +		logger.Errorf("discCCctxClients", "Failed to create client during discovery with error:\n(%T) %[1]v\nThis is probably an OpenShift bug.", err)
                     +		return nil
                     +	} else if projects, err := osclient.Projects().List(labels.Everything(), fields.Everything()); err == nil { // success!
                     +		list := projects.Items
                     +		if len(list) == 0 {
                     +			msg["tmpl"] = msgText + "Successfully requested project list, but it is empty, so user has no access to anything."
                     +			msg["projects"] = make([]string, 0)
                     +			logger.Infom("discCCctxSuccess", msg)
                     +			return nil
                     +		}
                     +		access := &ContextAccess{Projects: make([]string, len(list))}
                     +		for i, project := range list {
                     +			access.Projects[i] = project.Name
                     +			if project.Name == kapi.NamespaceDefault {
                     +				access.ClusterAdmin = true
                     +			}
                     +		}
                     +		if access.ClusterAdmin {
                     +			msg["tmpl"] = msgText + "Successfully requested project list; has access to default project, so assumed to be a cluster-admin"
                     +			logger.Infom("discCCctxSuccess", msg)
                     +		} else {
                     +			msg["tmpl"] = msgText + "Successfully requested project list; has access to project(s): {{.projectStr}}"
                     +			msg["projects"] = access.Projects
                     +			msg["projectStr"] = strings.Join(access.Projects, ", ")
                     +			logger.Infom("discCCctxSuccess", msg)
                     +		}
                     +		return access
                     +	} else { // something went wrong, so diagnose it
                     +		noResolveRx := regexp.MustCompile("dial tcp: lookup (\\S+): no such host")
                     +		unknownCaMsg := "x509: certificate signed by unknown authority"
                     +		unneededCaMsg := "specifying a root certificates file with the insecure flag is not allowed"
                     +		invalidCertNameRx := regexp.MustCompile("x509: certificate is valid for (\\S+, )+not (\\S+)")
                     +		connRefusedRx := regexp.MustCompile("dial tcp (\\S+): connection refused")
                     +		connTimeoutRx := regexp.MustCompile("dial tcp (\\S+): (?:connection timed out|i/o timeout)")
                     +		unauthenticatedMsg := `403 Forbidden: Forbidden: "/osapi/v1beta1/projects?namespace=" denied by default`
                     +		unauthorizedRx := regexp.MustCompile("401 Unauthorized: Unauthorized$")
+                    +
                     +		malformedHTTPMsg := "malformed HTTP response"
                     +		malformedTLSMsg := "tls: oversized record received with length"
+                    +
                     +		// interpret the error message for mere mortals
                     +		errm := err.Error()
                     +		var reason, errId string
                     +		switch {
                     +		case noResolveRx.MatchString(errm):
                     +			errId, reason = "clientNoResolve", `
                     +This usually means that the hostname does not resolve to an IP.
                     +Hostnames should usually be resolved via DNS or an /etc/hosts file.
                     +Ensure that the hostname resolves correctly from your host before proceeding.
                     +Of course, your config could also simply have the wrong hostname specified.
                     +`
                     +		case strings.Contains(errm, unknownCaMsg):
                     +			errId, reason = "clientUnknownCa", `
                     +This means that we cannot validate the certificate in use by the
                     +OpenShift API server, so we cannot securely communicate with it.
                     +Connections could be intercepted and your credentials stolen.
+                    +
                     +Since the server certificate we see when connecting is not validated
                     +by public certificate authorities (CAs), you probably need to specify a
                     +certificate from a private CA to validate the connection.
+                    +
                     +Your config may be specifying the wrong CA cert, or none, or there
                     +could actually be a man-in-the-middle attempting to intercept your
                     +connection.  If you are unconcerned about any of this, you can add the
                     +--insecure-skip-tls-verify flag to bypass secure (TLS) verification,
                     +but this is risky and should not be necessary.
                     +** Connections could be intercepted and your credentials stolen. **
                     +`
                     +		case strings.Contains(errm, unneededCaMsg):
                     +			errId, reason = "clientUnneededCa", `
                     +This means that for client connections to the OpenShift API server, you
                     +(or your kubeconfig) specified both a validating certificate authority
                     +and that the client should bypass connection security validation.
+                    +
                     +This is not allowed because it is likely to be a mistake.
+                    +
                     +If you want to use --insecure-skip-tls-verify to bypass security (which
                     +is usually a bad idea anyway), then you need to also clear the CA cert
                     +from your command line options or kubeconfig file(s). Of course, it
                     +would be far better to obtain and use a correct CA cert.
                     +`
                     +		case invalidCertNameRx.MatchString(errm):
                     +			match := invalidCertNameRx.FindStringSubmatch(errm)
                     +			serverHost := match[len(match)-1]
                     +			errId, reason = "clientInvCertName", fmt.Sprintf(`
                     +This means that the certificate in use by the OpenShift API server
                     +(master) does not match the hostname by which you are addressing it:
                     +  %s
                     +so a secure connection is not allowed. In theory, this *could* mean that
                     +someone is intercepting your connection and presenting a certificate
                     +that is valid but for a different server, which is why secure validation
                     +fails in this case.
+                    +
                     +However, the most likely explanation is that the server certificate
                     +needs to be updated to include the name you are using to reach it.
+                    +
                     +If the OpenShift server is generating its own certificates (which
                     +is default), then the --public-master flag on the OpenShift master is
                     +usually the easiest way to do this. If you need something more complicated
                     +(for instance, multiple public addresses for the API, or your own CA),
                     +then you will need to custom-generate the server certificate with the
                     +right names yourself.
+                    +
                     +If you are unconcerned about any of this, you can add the
                     +--insecure-skip-tls-verify flag to bypass secure (TLS) verification,
                     +but this is risky and should not be necessary.
                     +** Connections could be intercepted and your credentials stolen. **
                     +`, serverHost)
                     +		case connRefusedRx.MatchString(errm):
                     +			errId, reason = "clientInvCertName", `
                     +This means that when we tried to connect to the OpenShift API
                     +server (master), we reached the host, but nothing accepted the port
                     +connection. This could mean that the OpenShift master is stopped, or
                     +that a firewall or security policy is blocking access at that port.
+                    +
                     +You will not be able to connect or do anything at all with OpenShift
                     +until this server problem is resolved or you specify a corrected
                     +server address.`
                     +		case connTimeoutRx.MatchString(errm):
                     +			errId, reason = "clientConnTimeout", `
                     +This means that when we tried to connect to the OpenShift API server
                     +(master), we could not reach the host at all.
                     +* You may have specified the wrong host address.
                     +* This could mean the host is completely unavailable (down).
                     +* This could indicate a routing problem or a firewall that simply
                     +  drops requests rather than responding by reseting the connection.
                     +* It does not generally mean that DNS name resolution failed (which
                     +  would be a different error) though the problem could be that it
                     +  gave the wrong address.`
                     +		case strings.Contains(errm, malformedHTTPMsg):
                     +			errId, reason = "clientMalformedHTTP", `
                     +This means that when we tried to connect to the OpenShift API server
                     +(master) with a plain HTTP connection, the server did not speak
                     +HTTP back to us. The most common explanation is that a secure server
                     +is listening but you specified an http: connection instead of https:.
                     +There could also be another service listening at the intended port
                     +speaking some other protocol entirely.
+                    +
                     +You will not be able to connect or do anything at all with OpenShift
                     +until this server problem is resolved or you specify a corrected
                     +server address.`
                     +		case strings.Contains(errm, malformedTLSMsg):
                     +			errId, reason = "clientMalformedTLS", `
                     +This means that when we tried to connect to the OpenShift API server
                     +(master) with a secure HTTPS connection, the server did not speak
                     +HTTPS back to us. The most common explanation is that the server
                     +listening at that port is not the secure server you expected - it
                     +may be a non-secure HTTP server or the wrong service may be
                     +listening there, or you may have specified an incorrect port.
+                    +
                     +You will not be able to connect or do anything at all with OpenShift
                     +until this server problem is resolved or you specify a corrected
                     +server address.`
                     +		case strings.Contains(errm, unauthenticatedMsg):
                     +			errId, reason = "clientUnauthn", `
                     +This means that when we tried to make a request to the OpenShift API
                     +server, your kubeconfig did not present valid credentials to
                     +authenticate your client. Credentials generally consist of a client
                     +key/certificate or an access token. Your kubeconfig may not have
                     +presented any, or they may be invalid.`
                     +		case unauthorizedRx.MatchString(errm):
                     +			errId, reason = "clientUnauthz", `
                     +This means that when we tried to make a request to the OpenShift API
                     +server, the request required credentials that were not presented.
                     +This can happen when an authentication token expires. Try logging in
                     +with this user again.`
                     +		default:
                     +			errId, reason = "clientUnknownConnErr", `Diagnostics does not have an explanation for what this means. Please report this error so one can be added.`
                     +		}
                     +		errMsg := fmt.Sprintf("(%T) %[1]v", err)
                     +		msg["tmpl"] = msgText + errMsg + reason
                     +		msg["errMsg"] = errMsg
                     +		logger.Errorm(errId, msg)
                     +	}
                     +	return nil
                     +}

pkg/diagnostics/discovery/environment.go

History View file @ 42fb599

                     new file mode 100644
@@ -0,0 +1,80 @@
                     +package discovery
+                    +
                     +import (
                     +	kclientcmdapi "github.com/GoogleCloudPlatform/kubernetes/pkg/client/clientcmd/api"
                     +	"github.com/openshift/origin/pkg/cmd/experimental/diagnostics/options"
                     +	mconfigapi "github.com/openshift/origin/pkg/cmd/server/api"
                     +	osclientcmd "github.com/openshift/origin/pkg/cmd/util/clientcmd"
                     +	"github.com/openshift/origin/pkg/diagnostics/log"
                     +	"github.com/openshift/origin/pkg/diagnostics/types"
                     +)
+                    +
                     +// One env instance is created and filled in by discovery.
                     +// Then it should be considered immutable while diagnostics use it.
                     +type Environment struct {
                     +	// the options that were set by command invocation
                     +	Options *options.AllDiagnosticsOptions
+                    +
                     +	// used to print discovery and diagnostic logs
                     +	Log *log.Logger
+                    +
                     +	// do we have enough config to diagnose master,node,client?
                     +	WillCheck map[Target]bool
+                    +
                     +	// general system info
                     +	HasBash      bool                         // for non-Linux clients, will not have bash...
                     +	HasSystemd   bool                         // not even all Linux has systemd
                     +	SystemdUnits map[string]types.SystemdUnit // list of relevant units present on system
+                    +
                     +	// outcome from looking for executables
                     +	OscPath          string
                     +	OscVersion       types.Version
                     +	OpenshiftPath    string
                     +	OpenshiftVersion types.Version
+                    +
                     +	// saved results from client discovery
                     +	ClientConfigPath    string                          // first client config file found, if any
                     +	ClientConfigRaw     *kclientcmdapi.Config           // available to analyze ^^
                     +	OsConfig            *kclientcmdapi.Config           // actual merged client configuration
                     +	FactoryForContext   map[string]*osclientcmd.Factory // one for each known context
                     +	AccessForContext    map[string]*ContextAccess       // one for each context that has access to anything
                     +	ClusterAdminFactory *osclientcmd.Factory            // factory we will use for cluster-admin access (could easily be nil)
+                    +
                     +	// saved results from master discovery
                     +	MasterConfig *mconfigapi.MasterConfig // actual config determined from flags/file
+                    +
                     +	// saved results from node discovery
                     +	NodeConfig *mconfigapi.NodeConfig // actual config determined from flags/file
                     +}
+                    +
                     +type ContextAccess struct {
                     +	Projects     []string
                     +	ClusterAdmin bool // has access to see stuff only cluster-admin should
                     +}
+                    +
                     +func NewEnvironment(opts *options.AllDiagnosticsOptions, logger *log.Logger) *Environment {
                     +	return &Environment{
                     +		Options:           opts,
                     +		Log:               logger,
                     +		SystemdUnits:      make(map[string]types.SystemdUnit),
                     +		WillCheck:         make(map[Target]bool),
                     +		FactoryForContext: make(map[string]*osclientcmd.Factory),
                     +		AccessForContext:  make(map[string]*ContextAccess),
                     +	}
                     +}
+                    +
                     +// helpful translator
                     +func (env *Environment) DefaultFactory() *osclientcmd.Factory {
                     +	if env.FactoryForContext != nil && env.OsConfig != nil { // no need to panic if missing...
                     +		return env.FactoryForContext[env.OsConfig.CurrentContext]
                     +	}
                     +	return nil
                     +}
+                    +
                     +type Target string
+                    +
                     +const (
                     +	ClientTarget Target = "client"
                     +	MasterTarget Target = "master"
                     +	NodeTarget   Target = "node"
                     +)

pkg/diagnostics/discovery/master.go

History View file @ 42fb599

                     new file mode 100644
@@ -0,0 +1,79 @@
                     +package discovery
+                    +
                     +import (
                     +	mconfigapilatest "github.com/openshift/origin/pkg/cmd/server/api/latest"
                     +	"github.com/openshift/origin/pkg/cmd/server/start"
                     +)
+                    +
                     +const StandardMasterConfPath string = "/etc/openshift/master/master-config.yaml"
+                    +
                     +func (env *Environment) DiscoverMaster() {
                     +	// first, determine if we even have a master config
                     +	options := env.Options.MasterDiagOptions
                     +	if env.Options.MasterConfigPath != "" { // specified master conf, it has to load or we choke
                     +		options.MasterStartOptions.MasterArgs = start.NewDefaultMasterArgs() // and don't set any args
                     +		if env.tryMasterConfig(true) {
                     +			env.WillCheck[MasterTarget] = true
                     +		}
                     +	} else { // user did not indicate config file
                     +		env.Log.Debug("discMCnofile", "No top-level --master-config file specified")
                     +		if !options.MustCheck {
                     +			// general command, user couldn't indicate server flags;
                     +			// look for master config in standard location(s)
                     +			env.tryStandardMasterConfig() // or give up.
                     +		} else { // assume user provided flags like actual master.
                     +			env.tryMasterConfig(true)
                     +			env.WillCheck[MasterTarget] = true // regardless
                     +		}
                     +	}
                     +	if !env.WillCheck[MasterTarget] {
                     +		env.Log.Notice("discMCnone", "No master config found; master diagnostics will not be performed.")
                     +	}
                     +}
+                    +
                     +func (env *Environment) tryMasterConfig(errOnFail bool) bool /* worked? */ {
                     +	options := env.Options.MasterDiagOptions.MasterStartOptions
                     +	logOnFail := env.Log.Debugf
                     +	if errOnFail {
                     +		logOnFail = env.Log.Errorf
                     +	}
                     +	if err := options.Complete(); err != nil {
                     +		logOnFail("discMCstart", "Could not read master config options: (%T) %[1]v", err)
                     +		return false
                     +	} else if err = options.Validate([]string{}); err != nil {
                     +		logOnFail("discMCstart", "Could not read master config options: (%T) %[1]v", err)
                     +		return false
                     +	}
                     +	var err error
                     +	if path := options.ConfigFile; path != "" {
                     +		env.Log.Debugf("discMCfile", "Looking for master config file at '%s'", path)
                     +		if env.MasterConfig, err = mconfigapilatest.ReadAndResolveMasterConfig(path); err != nil {
                     +			logOnFail("discMCfail", "Could not read master config file '%s':\n(%T) %[2]v", path, err)
                     +			return false
                     +		}
                     +		env.Log.Infof("discMCfound", "Found a master config file:\n%[1]s", path)
                     +		return true
                     +	} else {
                     +		if env.MasterConfig, err = options.MasterArgs.BuildSerializeableMasterConfig(); err != nil {
                     +			logOnFail("discMCopts", "Could not build a master config from flags:\n(%T) %[1]v", err)
                     +			return false
                     +		}
                     +		env.Log.Infof("discMCfound", "No master config file, using any flags for configuration.")
                     +	}
                     +	return false
                     +}
+                    +
                     +func (env *Environment) tryStandardMasterConfig() bool /* worked? */ {
                     +	env.Log.Debug("discMCnoflags", "No master config flags specified, will try standard config location")
                     +	options := env.Options.MasterDiagOptions.MasterStartOptions
                     +	options.ConfigFile = StandardMasterConfPath
                     +	options.MasterArgs = start.NewDefaultMasterArgs()
                     +	if env.tryMasterConfig(false) {
                     +		env.Log.Debug("discMCdefault", "Using master config file at "+StandardMasterConfPath)
                     +		env.WillCheck[MasterTarget] = true
                     +		return true
                     +	} else { // otherwise, we just don't do master diagnostics
                     +		env.Log.Debugf("discMCnone", "Not using master config file at "+StandardMasterConfPath+" - will not do master diagnostics.")
                     +	}
                     +	return false
                     +}

pkg/diagnostics/discovery/node.go

History View file @ 42fb599

                     new file mode 100644
@@ -0,0 +1,80 @@
                     +package discovery
+                    +
                     +import (
                     +	mconfigapilatest "github.com/openshift/origin/pkg/cmd/server/api/latest"
                     +	"github.com/openshift/origin/pkg/cmd/server/start"
                     +)
+                    +
                     +const StandardNodeConfPath string = "/etc/openshift/node/node-config.yaml"
+                    +
                     +func (env *Environment) DiscoverNode() {
                     +	// first, determine if we even have a node config
                     +	options := env.Options.NodeDiagOptions
                     +	if env.Options.NodeConfigPath != "" { // specified node conf, it has to load or we choke
                     +		options.NodeStartOptions.NodeArgs = start.NewDefaultNodeArgs() // and don't set any args
                     +		if env.tryNodeConfig(true) {
                     +			env.WillCheck[NodeTarget] = true
                     +		}
                     +	} else { // user did not indicate config file
                     +		env.Log.Debug("discNCnofile", "No node config file specified")
                     +		if !options.MustCheck {
                     +			// general command, user couldn't indicate server flags;
                     +			// look for node config in standard location(s)
                     +			env.tryStandardNodeConfig() // or give up.
                     +		} else { // assume user provided flags like actual node.
                     +			env.tryNodeConfig(true)
                     +			env.WillCheck[NodeTarget] = true // regardless
                     +		}
                     +	}
                     +	if !env.WillCheck[NodeTarget] {
                     +		env.Log.Notice("discNCnone", "No node config found; node diagnostics will not be performed.")
                     +	}
                     +}
+                    +
                     +func (env *Environment) tryNodeConfig(errOnFail bool) bool /* worked */ {
                     +	options := env.Options.NodeDiagOptions.NodeStartOptions
                     +	//pretty.Println("nodeconfig options are:", options)
                     +	logOnFail := env.Log.Debugf
                     +	if errOnFail {
                     +		logOnFail = env.Log.Errorf
                     +	}
                     +	if err := options.Complete(); err != nil {
                     +		logOnFail("discNCstart", "Could not read node config options: (%T) %[1]v", err)
                     +		return false
                     +	} else if err = options.Validate([]string{}); err != nil {
                     +		logOnFail("discNCstart", "Could not read node config options: (%T) %[1]v", err)
                     +		return false
                     +	}
                     +	var err error
                     +	if path := options.ConfigFile; path != "" {
                     +		env.Log.Debugf("discNCfile", "Looking for node config file at '%s'", path)
                     +		if env.NodeConfig, err = mconfigapilatest.ReadAndResolveNodeConfig(path); err != nil {
                     +			logOnFail("discNCfail", "Could not read node config file '%s':\n(%T) %[2]v", path, err)
                     +			return false
                     +		}
                     +		env.Log.Infof("discNCfound", "Found a node config file:\n%[1]s", path)
                     +		return true
                     +	} else {
                     +		if env.NodeConfig, err = options.NodeArgs.BuildSerializeableNodeConfig(); err != nil {
                     +			logOnFail("discNCopts", "Could not build a node config from flags:\n(%T) %[1]v", err)
                     +			return false
                     +		}
                     +		env.Log.Infof("discNCfound", "No node config file, using any flags for configuration.")
                     +	}
                     +	return false
                     +}
+                    +
                     +func (env *Environment) tryStandardNodeConfig() bool /*worked*/ {
                     +	env.Log.Debug("discNCnoflags", "No node config flags specified, will try standard config location")
                     +	options := env.Options.NodeDiagOptions.NodeStartOptions
                     +	options.ConfigFile = StandardNodeConfPath
                     +	options.NodeArgs = start.NewDefaultNodeArgs()
                     +	if env.tryNodeConfig(false) {
                     +		env.Log.Debug("discNCdefault", "Using node config file at "+StandardNodeConfPath)
                     +		env.WillCheck[NodeTarget] = true
                     +		return true
                     +	} else { // otherwise, we just don't do node diagnostics
                     +		env.Log.Debugf("discNCnone", "Not using node config file at "+StandardNodeConfPath+" - will not do node diagnostics.")
                     +	}
                     +	return false
                     +}

pkg/diagnostics/discovery/os.go

History View file @ 42fb599

                     new file mode 100644
@@ -0,0 +1,19 @@
                     +package discovery
+                    +
                     +import (
                     +	"os/exec"
                     +	"runtime"
                     +)
+                    +
                     +// ----------------------------------------------------------
                     +// Determine what we need to about the OS
                     +func (env *Environment) DiscoverOperatingSystem() {
                     +	if runtime.GOOS == "linux" {
                     +		if _, err := exec.LookPath("systemctl"); err == nil {
                     +			env.HasSystemd = true
                     +		}
                     +		if _, err := exec.LookPath("/bin/bash"); err == nil {
                     +			env.HasBash = true
                     +		}
                     +	}
                     +}

pkg/diagnostics/discovery/systemd.go

History View file @ 42fb599

                     new file mode 100644
@@ -0,0 +1,58 @@
                     +package discovery
+                    +
                     +import (
                     +	"fmt"
                     +	"github.com/openshift/origin/pkg/diagnostics/log"
                     +	"github.com/openshift/origin/pkg/diagnostics/types"
                     +	"os/exec"
                     +	"strings"
                     +)
+                    +
                     +// ----------------------------------------------------------
                     +// Determine what systemd units are relevant, if any
                     +// Run after determining whether systemd and openshift are present.
                     +func (env *Environment) DiscoverSystemd() {
                     +	env.Log.Notice("discBegin", "Beginning systemd discovery")
                     +	for _, name := range []string{"openshift", "openshift-master", "openshift-node", "openshift-sdn-master", "openshift-sdn-node", "docker", "openvswitch", "iptables", "etcd", "kubernetes"} {
                     +		if env.SystemdUnits[name] = discoverSystemdUnit(name, env.Log); env.SystemdUnits[name].Exists {
                     +			env.Log.Debugm("discUnit", log.Msg{"tmpl": "Saw systemd unit {{.unit}}", "unit": name})
                     +		}
                     +	}
                     +	env.Log.Debugf("discUnits", "%v", env.SystemdUnits)
                     +}
+                    +
                     +func discoverSystemdUnit(name string, logger *log.Logger) types.SystemdUnit {
                     +	unit := types.SystemdUnit{Name: name, Exists: false}
                     +	if output, err := exec.Command("systemctl", "show", name).Output(); err != nil {
                     +		logger.Errorm("discCtlErr", log.Msg{"tmpl": "Error running `systemctl show {{.unit}}`: {{.error}}\nCannot analyze systemd units.", "unit": name, "error": err.Error()})
                     +	} else {
                     +		attr := make(map[string]string)
                     +		for _, line := range strings.Split(string(output), "\n") {
                     +			elements := strings.SplitN(line, "=", 2) // Looking for "Foo=Bar" settings
                     +			if len(elements) == 2 {                  // found that, record it...
                     +				attr[elements[0]] = elements[1]
                     +			}
                     +		}
                     +		if val := attr["LoadState"]; val != "loaded" {
                     +			logger.Debugm("discUnitENoExist", log.Msg{"tmpl": "systemd unit '{{.unit}}' does not exist. LoadState is '{{.state}}'", "unit": name, "state": val})
                     +			return unit // doesn't exist - leave everything blank
                     +		} else {
                     +			unit.Exists = true
                     +		}
                     +		if val := attr["UnitFileState"]; val == "enabled" {
                     +			logger.Debugm("discUnitEnabled", log.Msg{"tmpl": "systemd unit '{{.unit}}' is enabled - it will start automatically at boot.", "unit": name})
                     +			unit.Enabled = true
                     +		} else {
                     +			logger.Debugm("discUnitNoEnable", log.Msg{"tmpl": "systemd unit '{{.unit}}' is not enabled - it does not start automatically at boot. UnitFileState is '{{.state}}'", "unit": name, "state": val})
                     +		}
                     +		if val := attr["ActiveState"]; val == "active" {
                     +			logger.Debugm("discUnitActive", log.Msg{"tmpl": "systemd unit '{{.unit}}' is currently running", "unit": name})
                     +			unit.Active = true
                     +		} else {
                     +			logger.Debugm("discUnitNoActive", log.Msg{"unit": name, "state": val, "exit": unit.ExitStatus,
                     +				"tmpl": "systemd unit '{{.unit}}' is not currently running. ActiveState is '{{.state}}'; exit code was {{.exit}}."})
                     +		}
                     +		fmt.Sscanf(attr["StatusErrno"], "%d", &unit.ExitStatus) // ignore errors...
                     +	}
                     +	return unit
                     +}

pkg/diagnostics/log/json.go

History View file @ 42fb599

                     new file mode 100644
@@ -0,0 +1,33 @@
                     +package log
+                    +
                     +import (
                     +	"encoding/json"
                     +	"fmt"
                     +	"io"
                     +)
+                    +
                     +type jsonLogger struct {
                     +	out         io.Writer
                     +	logStarted  bool
                     +	logFinished bool
                     +}
+                    +
                     +func (j *jsonLogger) Write(l Level, msg Msg) {
                     +	if j.logStarted {
                     +		fmt.Fprintln(j.out, ",")
                     +	} else {
                     +		fmt.Fprintln(j.out, "[")
                     +	}
                     +	j.logStarted = true
                     +	msg["level"] = l.Name
                     +	b, _ := json.MarshalIndent(msg, "  ", "  ")
                     +	fmt.Print("  " + string(b))
                     +}
                     +func (j *jsonLogger) Finish() {
                     +	if j.logStarted {
                     +		fmt.Fprintln(j.out, "\n]")
                     +	} else if !j.logFinished {
                     +		fmt.Fprintln(j.out, "[]")
                     +	}
                     +	j.logFinished = true
                     +}

pkg/diagnostics/log/log.go

History View file @ 42fb599

                     new file mode 100644
@@ -0,0 +1,205 @@
                     +package log
+                    +
                     +import (
                     +	"bytes"
                     +	"errors"
                     +	"fmt"
                     +	ct "github.com/daviddengcn/go-colortext"
                     +	"io"
                     +	"strings"
                     +	"text/template"
                     +)
+                    +
                     +type Level struct {
                     +	Level  int
                     +	Name   string
                     +	Prefix string
                     +	Color  ct.Color
                     +	Bright bool
                     +}
+                    +
                     +type Logger struct {
                     +	logger       loggerType
                     +	level        Level
                     +	warningsSeen int
                     +	errorsSeen   int
                     +}
+                    +
                     +// Internal type to deal with different log formats
                     +type loggerType interface {
                     +	Write(Level, Msg)
                     +	Finish()
                     +}
+                    +
                     +func NewLogger(setLevel int, setFormat string, out io.Writer) (*Logger, error) {
+                    +
                     +	var logger loggerType
                     +	switch setFormat {
                     +	case "json":
                     +		logger = &jsonLogger{out: out}
                     +	case "yaml":
                     +		logger = &yamlLogger{out: out}
                     +	case "text":
                     +		logger = newTextLogger(out)
                     +	default:
                     +		return nil, errors.New("Output format must be one of: text, json, yaml")
                     +	}
+                    +
                     +	var err error = nil
                     +	level := DebugLevel
                     +	switch setLevel {
                     +	case 0:
                     +		level = ErrorLevel
                     +	case 1:
                     +		level = WarnLevel
                     +	case 2:
                     +		level = NoticeLevel
                     +	case 3:
                     +		level = InfoLevel
                     +	case 4:
                     +		// Debug, also default for invalid numbers below
                     +	default:
                     +		err = errors.New("Invalid diagnostic level; must be 0-4")
                     +	}
                     +	return &Logger{
                     +		logger: logger,
                     +		level:  level,
                     +	}, err
                     +}
+                    +
                     +// a map message type to throw type safety and method signatures out the window:
                     +type Msg map[string]interface{}
+                    +
                     +/* a Msg can be expected to have the following entries:
                     + * "id": an identifier unique to the message being logged, intended for json/yaml output
                     + *       so that automation can recognize specific messages without trying to parse them.
                     + * "text": human-readable message text
                     + * "tmpl": a template string as understood by text/template that can use any of the other
                     + *         entries in this Msg as inputs. This is removed, evaluated, and the result is
                     + *         placed in "text". If there is an error during evaluation, the error is placed
                     + *         in "templateErr", the original id of the message is stored in "templateId",
                     + *         and the Msg id is changed to "tmplErr". Of course, this should never happen
                     + *         if there are no mistakes in the calling code.
                     + */
+                    +
                     +var (
                     +	ErrorLevel  = Level{0, "error", "ERROR: ", ct.Red, true}   // Something is definitely wrong
                     +	WarnLevel   = Level{1, "warn", "WARN:  ", ct.Yellow, true} // Likely to be an issue but maybe not
                     +	NoticeLevel = Level{2, "note", "[Note] ", ct.White, false} // Introductory / summary
                     +	InfoLevel   = Level{3, "info", "Info:  ", ct.None, false}  // Just informational
                     +	DebugLevel  = Level{4, "debug", "debug: ", ct.None, false} // Extra verbose
                     +)
+                    +
                     +// Provide a summary at the end
                     +func (l *Logger) Summary() {
                     +	l.Notice("summary", "\nSummary of diagnostics execution:\n")
                     +	if l.warningsSeen > 0 {
                     +		l.Noticem("sumWarn", Msg{"tmpl": "Warnings seen: {{.num}}", "num": l.warningsSeen})
                     +	}
                     +	if l.errorsSeen > 0 {
                     +		l.Noticem("sumErr", Msg{"tmpl": "Errors seen: {{.num}}", "num": l.errorsSeen})
                     +	}
                     +	if l.warningsSeen == 0 && l.errorsSeen == 0 {
                     +		l.Notice("sumNone", "Completed with no errors or warnings seen.")
                     +	}
                     +}
+                    +
                     +func (l *Logger) Log(level Level, id string, msg Msg) {
                     +	if level.Level > l.level.Level {
                     +		return
                     +	}
                     +	msg["id"] = id // TODO: use to retrieve template from elsewhere
                     +	// if given a template, convert it to text
                     +	if tmpl, exists := msg["tmpl"]; exists {
                     +		var buff bytes.Buffer
                     +		if tmplString, assertion := tmpl.(string); !assertion {
                     +			msg["templateErr"] = fmt.Sprintf("Invalid template type: %T", tmpl)
                     +			msg["templateId"] = id
                     +			msg["id"] = "tmplErr"
                     +		} else {
                     +			parsedTmpl, err := template.New(id).Parse(tmplString)
                     +			if err != nil {
                     +				msg["templateErr"] = err.Error()
                     +				msg["templateId"] = id
                     +				msg["id"] = "tmplErr"
                     +			} else if err = parsedTmpl.Execute(&buff, msg); err != nil {
                     +				msg["templateErr"] = err.Error()
                     +				msg["templateId"] = id
                     +				msg["id"] = "tmplErr"
                     +			} else {
                     +				msg["text"] = buff.String()
                     +				delete(msg, "tmpl")
                     +			}
                     +		}
                     +	}
                     +	if level.Level == ErrorLevel.Level {
                     +		l.errorsSeen += 1
                     +	} else if level.Level == WarnLevel.Level {
                     +		l.warningsSeen += 1
                     +	}
                     +	l.logger.Write(level, msg)
                     +}
+                    +
                     +// Convenience functions
                     +func (l *Logger) Error(id string, text string) {
                     +	l.Log(ErrorLevel, id, Msg{"text": text})
                     +}
                     +func (l *Logger) Errorf(id string, msg string, a ...interface{}) {
                     +	l.Error(id, fmt.Sprintf(msg, a...))
                     +}
                     +func (l *Logger) Errorm(id string, msg Msg) {
                     +	l.Log(ErrorLevel, id, msg)
                     +}
                     +func (l *Logger) Warn(id string, text string) {
                     +	l.Log(WarnLevel, id, Msg{"text": text})
                     +}
                     +func (l *Logger) Warnf(id string, msg string, a ...interface{}) {
                     +	l.Warn(id, fmt.Sprintf(msg, a...))
                     +}
                     +func (l *Logger) Warnm(id string, msg Msg) {
                     +	l.Log(WarnLevel, id, msg)
                     +}
                     +func (l *Logger) Info(id string, text string) {
                     +	l.Log(InfoLevel, id, Msg{"text": text})
                     +}
                     +func (l *Logger) Infof(id string, msg string, a ...interface{}) {
                     +	l.Info(id, fmt.Sprintf(msg, a...))
                     +}
                     +func (l *Logger) Infom(id string, msg Msg) {
                     +	l.Log(InfoLevel, id, msg)
                     +}
                     +func (l *Logger) Notice(id string, text string) {
                     +	l.Log(NoticeLevel, id, Msg{"text": text})
                     +}
                     +func (l *Logger) Noticef(id string, msg string, a ...interface{}) {
                     +	l.Notice(id, fmt.Sprintf(msg, a...))
                     +}
                     +func (l *Logger) Noticem(id string, msg Msg) {
                     +	l.Log(NoticeLevel, id, msg)
                     +}
                     +func (l *Logger) Debug(id string, text string) {
                     +	l.Log(DebugLevel, id, Msg{"text": text})
                     +}
                     +func (l *Logger) Debugf(id string, msg string, a ...interface{}) {
                     +	l.Debug(id, fmt.Sprintf(msg, a...))
                     +}
                     +func (l *Logger) Debugm(id string, msg Msg) {
                     +	l.Log(DebugLevel, id, msg)
                     +}
+                    +
                     +func (l *Logger) Finish() {
                     +	l.logger.Finish()
                     +}
+                    +
                     +func (l *Logger) ErrorsSeen() bool {
                     +	return l.errorsSeen > 0
                     +}
+                    +
                     +// turn excess lines into [...]
                     +func LimitLines(msg string, n int) string {
                     +	lines := strings.SplitN(msg, "\n", n+1)
                     +	if len(lines) == n+1 {
                     +		lines[n] = "[...]"
                     +	}
                     +	return strings.Join(lines, "\n")
                     +}

pkg/diagnostics/log/text.go

History View file @ 42fb599

                     new file mode 100644
@@ -0,0 +1,53 @@
                     +package log
+                    +
                     +import (
                     +	"fmt"
                     +	ct "github.com/daviddengcn/go-colortext"
                     +	"github.com/docker/docker/pkg/term"
                     +	"io"
                     +	"os"
                     +	"strings"
                     +)
+                    +
                     +type textLogger struct {
                     +	out         io.Writer
                     +	ttyOutput   bool // usually want color; but do not output colors to non-tty
                     +	lastNewline bool // keep track of newline separation
                     +}
+                    +
                     +func newTextLogger(out io.Writer) *textLogger {
                     +	logger := &textLogger{out: out, lastNewline: true}
+                    +
                     +	if IsTerminal(out) {
                     +		// only want color sequences to humans, not redirected output (logs, "less", etc.)
                     +		logger.ttyOutput = true
                     +	}
                     +	return logger
                     +}
+                    +
                     +// cribbed a la "github.com/openshift/origin/pkg/cmd/util"
                     +func IsTerminal(w io.Writer) bool {
                     +	file, ok := w.(*os.File)
                     +	return ok && term.IsTerminal(file.Fd())
                     +}
+                    +
                     +func (t *textLogger) Write(l Level, msg Msg) {
                     +	if t.ttyOutput {
                     +		ct.ChangeColor(l.Color, l.Bright, ct.None, false)
                     +	}
                     +	text := strings.TrimSpace(fmt.Sprintf("%v", msg["text"]))
                     +	if strings.Contains(text, "\n") { // separate multiline comments with newlines
                     +		if !t.lastNewline {
                     +			fmt.Fprintln(t.out) // separate from previous one-line log msg
                     +		}
                     +		text = text + "\n"
                     +		t.lastNewline = true
                     +	} else {
                     +		t.lastNewline = false
                     +	}
                     +	fmt.Fprintln(t.out, l.Prefix+strings.Replace(text, "\n", "\n       ", -1))
                     +	if t.ttyOutput {
                     +		ct.ResetColor()
                     +	}
                     +}
                     +func (t *textLogger) Finish() {}

pkg/diagnostics/log/yaml.go

History View file @ 42fb599

                     new file mode 100644
@@ -0,0 +1,19 @@
                     +package log
+                    +
                     +import (
                     +	"fmt"
                     +	"gopkg.in/yaml.v2"
                     +	"io"
                     +)
+                    +
                     +type yamlLogger struct {
                     +	out        io.Writer
                     +	logStarted bool
                     +}
+                    +
                     +func (y *yamlLogger) Write(l Level, msg Msg) {
                     +	msg["level"] = l.Name
                     +	b, _ := yaml.Marshal(&msg)
                     +	fmt.Fprintln(y.out, "---\n"+string(b))
                     +}
                     +func (y *yamlLogger) Finish() {}

pkg/diagnostics/run/run.go

History View file @ 42fb599

                     new file mode 100644
@@ -0,0 +1,129 @@
                     +package run
+                    +
                     +import (
                     +	"github.com/openshift/origin/pkg/cmd/experimental/diagnostics/options"
                     +	"github.com/openshift/origin/pkg/cmd/server/start"
                     +	"github.com/openshift/origin/pkg/diagnostics/client"
                     +	"github.com/openshift/origin/pkg/diagnostics/discovery"
                     +	"github.com/openshift/origin/pkg/diagnostics/log"
                     +	"github.com/openshift/origin/pkg/diagnostics/systemd"
                     +	"github.com/openshift/origin/pkg/diagnostics/types/diagnostic"
                     +	"os"
                     +	"strings"
                     +)
+                    +
                     +func Diagnose(opts *options.AllDiagnosticsOptions) {
                     +	// start output to a log
                     +	dopts := opts.DiagOptions
                     +	logger, _ := log.NewLogger(dopts.DiagLevel, dopts.DiagFormat, dopts.Output.Get())
                     +	// start discovery
                     +	if env := RunDiscovery(opts, logger); env != nil { // discovery result can veto continuing
                     +		allDiags := make(map[string]map[string]diagnostic.Diagnostic)
                     +		// now we will figure out what diagnostics to run based on discovery.
                     +		for area := range env.WillCheck {
                     +			switch area {
                     +			case discovery.ClientTarget:
                     +				allDiags["client"] = client.Diagnostics
                     +			case discovery.MasterTarget, discovery.NodeTarget:
                     +				allDiags["systemd"] = systemd.Diagnostics
                     +			}
                     +		}
                     +		if list := opts.DiagOptions.Diagnostics; len(*list) > 0 {
                     +			// just run a specific (set of) diagnostic(s)
                     +			for _, arg := range *list {
                     +				parts := strings.SplitN(arg, ".", 2)
                     +				if len(parts) < 2 {
                     +					env.Log.Noticef("noDiag", `There is no such diagnostic "%s"`, arg)
                     +					continue
                     +				}
                     +				area, name := parts[0], parts[1]
                     +				if diagnostics, exists := allDiags[area]; !exists {
                     +					env.Log.Noticef("noDiag", `There is no such diagnostic "%s"`, arg)
                     +				} else if diag, exists := diagnostics[name]; !exists {
                     +					env.Log.Noticef("noDiag", `There is no such diagnostic "%s"`, arg)
                     +				} else {
                     +					RunDiagnostic(area, name, diag, env)
                     +				}
                     +			}
                     +		} else {
                     +			// TODO: run all of these in parallel but ensure sane output
                     +			for area, diagnostics := range allDiags {
                     +				for name, diag := range diagnostics {
                     +					RunDiagnostic(area, name, diag, env)
                     +				}
                     +			}
                     +		}
                     +	}
                     +	logger.Summary()
                     +	logger.Finish()
                     +	if logger.ErrorsSeen() {
                     +		os.Exit(255)
                     +	}
                     +}
+                    +
                     +// ----------------------------------------------------------
                     +// Examine system and return findings in an Environment
                     +func RunDiscovery(adOpts *options.AllDiagnosticsOptions, logger *log.Logger) *discovery.Environment {
                     +	logger.Notice("discBegin", "Beginning discovery of environment")
                     +	env := discovery.NewEnvironment(adOpts, logger)
                     +	env.DiscoverOperatingSystem()
                     +	if adOpts.MasterDiagOptions != nil || adOpts.NodeDiagOptions != nil {
                     +		env.DiscoverSystemd()
                     +	}
                     +	if mdOpts := adOpts.MasterDiagOptions; mdOpts != nil {
                     +		if mdOpts.MasterStartOptions == nil {
                     +			mdOpts.MasterStartOptions = &start.MasterOptions{ConfigFile: adOpts.MasterConfigPath}
                     +			// leaving MasterArgs nil signals it has to be a master config file or nothing.
                     +		} else if adOpts.MasterConfigPath != "" {
                     +			mdOpts.MasterStartOptions.ConfigFile = adOpts.MasterConfigPath
                     +		}
                     +		env.DiscoverMaster()
                     +	}
                     +	if ndOpts := adOpts.NodeDiagOptions; ndOpts != nil {
                     +		if ndOpts.NodeStartOptions == nil {
                     +			ndOpts.NodeStartOptions = &start.NodeOptions{ConfigFile: adOpts.NodeConfigPath}
                     +			// no NodeArgs signals it has to be a node config file or nothing.
                     +		} else if adOpts.NodeConfigPath != "" {
                     +			ndOpts.NodeStartOptions.ConfigFile = adOpts.NodeConfigPath
                     +		}
                     +		env.DiscoverNode()
                     +	}
                     +	if cdOpts := adOpts.ClientDiagOptions; cdOpts != nil {
                     +		env.DiscoverClient()
                     +		env.ReadClientConfigFiles() // so user knows where config is coming from (or not)
                     +		env.ConfigClient()
                     +	}
                     +	checkAny := false
                     +	for _, check := range env.WillCheck {
                     +		checkAny = checkAny || check
                     +	}
                     +	if !checkAny {
                     +		logger.Error("discNoChecks", "Cannot find any OpenShift configuration. Please specify which component or configuration you wish to troubleshoot.")
                     +		return nil
                     +	}
                     +	return env
                     +}
+                    +
                     +func RunDiagnostic(area string, name string, diag diagnostic.Diagnostic, env *discovery.Environment) {
                     +	defer func() {
                     +		// recover from diagnostics that panic so others can still run
                     +		if r := recover(); r != nil {
                     +			env.Log.Errorf("diagPanic", "Diagnostic '%s' crashed; this is usually a bug in either diagnostics or OpenShift. Stack trace:\n%+v", name, r)
                     +		}
                     +	}()
                     +	if diag.Condition != nil {
                     +		if skip, reason := diag.Condition(env); skip {
                     +			if reason == "" {
                     +				env.Log.Noticem("diagSkip", log.Msg{"area": area, "name": name, "diag": diag.Description,
                     +					"tmpl": "Skipping diagnostic: {{.area}}.{{.name}}\nDescription: {{.diag}}"})
                     +			} else {
                     +				env.Log.Noticem("diagSkip", log.Msg{"area": area, "name": name, "diag": diag.Description, "reason": reason,
                     +					"tmpl": "Skipping diagnostic: {{.area}}.{{.name}}\nDescription: {{.diag}}\nBecause: {{.reason}}"})
                     +			}
                     +			return
                     +		}
                     +	}
                     +	env.Log.Noticem("diagRun", log.Msg{"area": area, "name": name, "diag": diag.Description,
                     +		"tmpl": "Running diagnostic: {{.area}}.{{.name}}\nDescription: {{.diag}}"})
                     +	diag.Run(env)
                     +}

pkg/diagnostics/systemd/systemd.go

History View file @ 42fb599

                     new file mode 100644
@@ -0,0 +1,514 @@
                     +package systemd
+                    +
                     +import (
                     +	"bufio"
                     +	"encoding/json"
                     +	"fmt"
                     +	"github.com/openshift/origin/pkg/diagnostics/discovery"
                     +	"github.com/openshift/origin/pkg/diagnostics/log"
                     +	"github.com/openshift/origin/pkg/diagnostics/types"
                     +	"github.com/openshift/origin/pkg/diagnostics/types/diagnostic"
                     +	"io"
                     +	"os/exec"
                     +	"regexp"
                     +)
+                    +
                     +type logEntry struct {
                     +	Message string // I feel certain we will want more fields at some point
                     +}
+                    +
                     +type logMatcher struct { // regex for scanning log messages and interpreting them when found
                     +	Regexp         *regexp.Regexp
                     +	Level          log.Level
                     +	Id             string
                     +	Interpretation string // log with above level+id if it's simple
                     +	KeepAfterMatch bool   // usually note only first matched entry, ignore rest
                     +	Interpret      func(  // run this for custom logic on match
                     +		env *discovery.Environment,
                     +		entry *logEntry,
                     +		matches []string,
                     +	) bool // KeepAfterMatch?
                     +}
+                    +
                     +type unitSpec struct {
                     +	Name        string
                     +	StartMatch  *regexp.Regexp // regex to look for in log messages indicating startup
                     +	LogMatchers []logMatcher   // suspect log patterns to check for - checked in order
                     +}
+                    +
                     +//
                     +// -------- Things that feed into the diagnostics definitions -----------
                     +// Search for Diagnostics for the actual diagnostics.
+                    +
                     +// Reusable log matchers:
                     +var badImageTemplate = logMatcher{
                     +	Regexp: regexp.MustCompile(`Unable to find an image for .* due to an error processing the format: %!v\\(MISSING\\)`),
                     +	Level:  log.InfoLevel,
                     +	Interpretation: `
                     +This error indicates openshift was given the flag --images including an invalid format variable.
                     +Valid formats can include (literally) ${component} and ${version}.
                     +This could be a typo or you might be intending to hardcode something,
                     +such as a version which should be specified as e.g. v3.0, not ${v3.0}.
                     +Note that the --images flag may be supplied via the OpenShift master,
                     +node, or "openshift ex registry/router" invocations and should usually
                     +be the same for each.`,
                     +}
+                    +
                     +// captures for logMatcher Interpret functions to store state between matches
                     +var tlsClientErrorSeen map[string]bool
+                    +
                     +// Specify what units we can check and what to look for and say about it
                     +var unitLogSpecs = []*unitSpec{
                     +	{
                     +		Name:       "openshift-master",
                     +		StartMatch: regexp.MustCompile("Starting an OpenShift master"),
                     +		LogMatchers: []logMatcher{
                     +			badImageTemplate,
                     +			{
                     +				Regexp:         regexp.MustCompile("Unable to decode an event from the watch stream: local error: unexpected message"),
                     +				Level:          log.InfoLevel,
                     +				Id:             "sdLogOMIgnore",
                     +				Interpretation: "You can safely ignore this message.",
                     +			},
                     +			{
                     +				Regexp: regexp.MustCompile("HTTP probe error: Get .*/healthz: dial tcp .*:10250: connection refused"),
                     +				Level:  log.InfoLevel,
                     +				Id:     "sdLogOMhzRef",
                     +				Interpretation: `
                     +The OpenShift master does a health check on nodes that are defined in
                     +its records, and this is the result when the node is not available yet.
                     +Since the master records are typically created before the node is
                     +available, this is not usually a problem, unless it continues in the
                     +logs after the node is actually available.`,
                     +			},
                     +			{
                     +				// TODO: don't rely on ipv4 format, should be ipv6 "soon"
                     +				Regexp: regexp.MustCompile("http: TLS handshake error from ([\\d.]+):\\d+: remote error: bad certificate"),
                     +				Level:  log.WarnLevel,
                     +				Interpret: func(env *discovery.Environment, entry *logEntry, matches []string) bool {
                     +					client := matches[1]
                     +					prelude := fmt.Sprintf("Found 'openshift-master' journald log message:\n  %s\n", entry.Message)
                     +					if tlsClientErrorSeen == nil { // first time this message was seen
                     +						tlsClientErrorSeen = map[string]bool{client: true}
                     +						// TODO: too generic, adjust message depending on subnet of the "from" address
                     +						env.Log.Warnm("sdLogOMreBadCert", log.Msg{"client": client, "text": prelude + `
                     +This error indicates that a client attempted to connect to the master
                     +HTTPS API server but broke off the connection because the master's
                     +certificate is not validated by a cerificate authority (CA) acceptable
                     +to the client. There are a number of ways this can occur, some more
                     +problematic than others.
+                    +
                     +At this time, the OpenShift master certificate is signed by a private CA
                     +(created the first time the master runs) and clients should have a copy of
                     +that CA certificate in order to validate connections to the master. Most
                     +likely, either:
                     +1. the master has generated a new CA (after the administrator deleted
                     +   the old one) and the client has a copy of the old CA cert, or
                     +2. the client hasn't been configured with a private CA at all (or the
                     +   wrong one), or
                     +3. the client is attempting to reach the master at a URL that isn't
                     +   covered by the master's server certificate, e.g. a public-facing
                     +   name or IP that isn't known to the master automatically; this may
                     +   need to be specified with the --public-master flag on the master
                     +   in order to generate a new server certificate including it.
+                    +
                     +Clients of the master may include users, nodes, and infrastructure
                     +components running as containers. Check the "from" IP address in the
                     +log message:
                     +* If it is from a SDN IP, it is likely from an infrastructure
                     +  component. Check pod logs and recreate it with the correct CA cert.
                     +  Routers and registries won't work properly with the wrong CA.
                     +* If it is from a node IP, the client is likely a node. Check the
                     +  openshift-node and openshift-sdn-node logs and reconfigure with the
                     +  correct CA cert. Nodes will be unable to create pods until this is
                     +  corrected.
                     +* If it is from an external IP, it is likely from a user (CLI, browser,
                     +  etc.). osc and openshift clients should be configured with the correct
                     +  CA cert; browsers can also add CA certs but it is usually easier
                     +  to just have them accept the server certificate on the first visit
                     +  (so this message may simply indicate that the master generated a new
                     +  server certificate, e.g. to add a different --public-master, and a
                     +  browser hasn't accepted it yet and is still attempting API calls;
                     +  try logging out of the console and back in again).`})
                     +					} else if !tlsClientErrorSeen[client] {
                     +						tlsClientErrorSeen[client] = true
                     +						env.Log.Warnm("sdLogOMreBadCert", log.Msg{"client": client, "text": prelude +
                     +							`This message was diagnosed above, but for a different client address.`})
                     +					} // else, it's a repeat, don't mention it
                     +					return true // show once for every client failing to connect, not just the first
                     +				},
                     +			},
                     +			{
                     +				// user &{system:anonymous  [system:unauthenticated]} -> /api/v1beta1/services?namespace="
                     +				Regexp: regexp.MustCompile("system:anonymous\\W*system:unauthenticated\\W*/api/v1beta1/services\\?namespace="),
                     +				Level:  log.WarnLevel,
                     +				Id:     "sdLogOMunauthNode",
                     +				Interpretation: `
                     +This indicates the OpenShift API server (master) received an unscoped
                     +request to get Services. Requests like this probably come from an
                     +OpenShift node trying to discover where it should proxy services.
+                    +
                     +However, the request was unauthenticated, so it was denied. The node
                     +either did not offer a client certificate for credential, or offered an
                     +invalid one (not signed by the certificate authority the master uses).
                     +The node will not be able to function without this access.
+                    +
                     +Unfortunately, this message does not tell us *which* node is the
                     +problem. But running diagnostics on your node hosts should find a log
                     +message for any node with this problem.
                     +`,
                     +			},
                     +		},
                     +	},
                     +	{
                     +		Name:        "openshift-sdn-master",
                     +		StartMatch:  regexp.MustCompile("Starting OpenShift SDN Master"),
                     +		LogMatchers: []logMatcher{},
                     +	},
                     +	{
                     +		Name:       "openshift-node",
                     +		StartMatch: regexp.MustCompile("Starting an OpenShift node"),
                     +		LogMatchers: []logMatcher{
                     +			badImageTemplate,
                     +			{
                     +				Regexp: regexp.MustCompile(`error updating node status, will retry:.*system:(\S+) cannot get on minions with name "(\S+)" in default|Failed to list .*Forbidden: "\S+" system:node-\S+ cannot list on (pods|services) in`),
                     +				Level:  log.ErrorLevel,
                     +				Id:     "sdLogONnodePerm",
                     +				Interpretation: `
                     +openshift-node lacks the permission to update the node's status or request
                     +its responsibilities from the OpenShift master API. This host will not
                     +function as a node until this is resolved. Pods scheduled for this node
                     +will remain in pending or unknown state forever.
+                    +
                     +This probably indicates a problem with policy as node credentials in beta3
                     +allow access to anything (later, they will be constrained only to pods
                     +that belong to them). This message indicates that the node credentials
                     +are authenticated, but not authorized for the necessary access.
+                    +
                     +One way to encounter this is to start the master with data from an older
                     +installation (e.g. beta2) in etcd. The default startup will not update
                     +existing policy to allow node access as they would have if starting with
                     +an empty etcd. In this case, the following command (as admin):
+                    +
                     +    osc get rolebindings -n master
+                    +
                     +... should show group system:nodes has the master/system:component role.
                     +If that is missing, you may wish to rewrite the bootstrap policy with:
+                    +
                     +    POLICY=/var/lib/openshift/openshift.local.policy/policy.json
                     +    CONF=/etc/openshift/master.yaml
                     +    openshift admin overwrite-policy --filename=$POLICY --master-config=$CONF
+                    +
                     +If that is not the problem, then it may be that access controls on nodes
                     +have been put in place and are blocking this request; check the error
                     +message to see whether the node is attempting to use the wrong node name.
                     +`,
                     +			},
                     +			{
                     +				Regexp: regexp.MustCompile("Unable to load services: Get (http\\S+/api/v1beta1/services\\?namespace=): (.+)"), // e.g. x509: certificate signed by unknown authority
                     +				Level:  log.ErrorLevel,
                     +				Id:     "sdLogONconnMaster",
                     +				Interpretation: `
                     +openshift-node could not connect to the OpenShift master API in order
                     +to determine its responsibilities. This host will not function as a node
                     +until this is resolved. Pods scheduled for this node will remain in
                     +pending or unknown state forever.`,
                     +			},
                     +			{
                     +				Regexp: regexp.MustCompile(`Unable to load services: request.*403 Forbidden: Forbidden: "/api/v1beta1/services\?namespace=" denied by default`),
                     +				Level:  log.ErrorLevel,
                     +				Id:     "sdLogONMasterForbids",
                     +				Interpretation: `
                     +openshift-node could not connect to the OpenShift master API to determine
                     +its responsibilities because it lacks the proper credentials. Nodes
                     +should specify a client certificate in order to identify themselves to
                     +the master. This message typically means that either no client key/cert
                     +was supplied, or it is not validated by the certificate authority (CA)
                     +the master uses. You should supply a correct client key and certificate
                     +to the .kubeconfig specified in /etc/sysconfig/openshift-node
+                    +
                     +This host will not function as a node until this is resolved. Pods
                     +scheduled for this node will remain in pending or unknown state forever.`,
                     +			},
                     +		},
                     +	},
                     +	{
                     +		Name:       "openshift-sdn-node",
                     +		StartMatch: regexp.MustCompile("Starting OpenShift SDN node"),
                     +		LogMatchers: []logMatcher{
                     +			{
                     +				Regexp: regexp.MustCompile("Could not find an allocated subnet for this minion.*Waiting.."),
                     +				Level:  log.WarnLevel,
                     +				Id:     "sdLogOSNnoSubnet",
                     +				Interpretation: `
                     +This warning occurs when openshift-sdn-node is trying to request the
                     +SDN subnet it should be configured with according to openshift-sdn-master,
                     +but either can't connect to it ("All the given peers are not reachable")
                     +or has not yet been assigned a subnet ("Key not found").
+                    +
                     +This can just be a matter of waiting for the master to become fully
                     +available and define a record for the node (aka "minion") to use,
                     +and openshift-sdn-node will wait until that occurs, so the presence
                     +of this message in the node log isn't necessarily a problem as
                     +long as the SDN is actually working, but this message may help indicate
                     +the problem if it is not working.
+                    +
                     +If the master is available and this node's record is defined and this
                     +message persists, then it may be a sign of a different misconfiguration.
                     +Unfortunately the message is not specific about why the connection failed.
                     +Check MASTER_URL in /etc/sysconfig/openshift-sdn-node:
                     + * Is the protocol https? It should be http.
                     + * Can you reach the address and port from the node using curl?
                     +   ("404 page not found" is correct response)`,
                     +			},
                     +		},
                     +	},
                     +	{
                     +		Name:       "docker",
                     +		StartMatch: regexp.MustCompile(`Starting Docker Application Container Engine.`), // RHEL Docker at least
                     +		LogMatchers: []logMatcher{
                     +			{
                     +				Regexp: regexp.MustCompile(`Usage: docker \\[OPTIONS\\] COMMAND`),
                     +				Level:  log.ErrorLevel,
                     +				Id:     "sdLogDbadOpt",
                     +				Interpretation: `
                     +This indicates that docker failed to parse its command line
                     +successfully, so it just printed a standard usage message and exited.
                     +Its command line is built from variables in /etc/sysconfig/docker
                     +(which may be overridden by variables in /etc/sysconfig/openshift-sdn-node)
                     +so check there for problems.
+                    +
                     +The OpenShift node will not work on this host until this is resolved.`,
                     +			},
                     +			{
                     +				Regexp: regexp.MustCompile(`^Unable to open the database file: unable to open database file$`),
                     +				Level:  log.ErrorLevel,
                     +				Id:     "sdLogDopenDB",
                     +				Interpretation: `
                     +This indicates that docker failed to record its state to its database.
                     +The most likely reason is that it is out of disk space. It is also
                     +possible for other device or permissions problems to be at fault.
+                    +
                     +Sometimes this is due to excess completed containers not being cleaned
                     +up. You can delete all completed containers with this command (running
                     +containers will not be deleted):
+                    +
                     +  # docker rm $(docker ps -qa)
+                    +
                     +Whatever the reason, docker will not function in this state.
                     +The OpenShift node will not work on this host until this is resolved.`,
                     +			},
                     +			{
                     +				Regexp: regexp.MustCompile(`no space left on device$`),
                     +				Level:  log.ErrorLevel,
                     +				Id:     "sdLogDfull",
                     +				Interpretation: `
                     +This indicates that docker has run out of space for container volumes
                     +or metadata (by default, stored in /var/lib/docker, but configurable).
+                    +
                     +docker will not function in this state. It requires that disk space be
                     +added to the relevant filesystem or files deleted to make space.
                     +Sometimes this is due to excess completed containers not being cleaned
                     +up. You can delete all completed containers with this command (running
                     +containers will not be deleted):
+                    +
                     +  # docker rm $(docker ps -qa)
+                    +
                     +The OpenShift node will not work on this host until this is resolved.`,
                     +			},
                     +			{ // generic error seen - do this last
                     +				Regexp: regexp.MustCompile(`\\slevel="fatal"\\s`),
                     +				Level:  log.ErrorLevel,
                     +				Id:     "sdLogDfatal",
                     +				Interpretation: `
                     +This is not a known problem, but it is causing Docker to crash,
                     +so the OpenShift node will not work on this host until it is resolved.`,
                     +			},
                     +		},
                     +	},
                     +	{
                     +		Name:        "openvswitch",
                     +		StartMatch:  regexp.MustCompile("Starting Open vSwitch"),
                     +		LogMatchers: []logMatcher{},
                     +	},
                     +}
+                    +
                     +var systemdRelevant = func(env *discovery.Environment) (skip bool, reason string) {
                     +	if !env.HasSystemd {
                     +		return true, "systemd is not present on this host"
                     +	}
                     +	return false, ""
                     +}
+                    +
                     +//
                     +// -------- The actual diagnostics definitions -----------
                     +//
+                    +
                     +var Diagnostics = map[string]diagnostic.Diagnostic{
+                    +
                     +	"AnalyzeLogs": {
                     +		Description: "Check for problems in systemd service logs since each service last started",
                     +		Condition:   systemdRelevant,
                     +		Run: func(env *discovery.Environment) {
                     +			for _, unit := range unitLogSpecs {
                     +				if svc := env.SystemdUnits[unit.Name]; svc.Enabled || svc.Active {
                     +					env.Log.Infom("sdCheckLogs", log.Msg{"tmpl": "Checking journalctl logs for '{{.name}}' service", "name": unit.Name})
                     +					matchLogsSinceLastStart(unit, env)
                     +				}
                     +			}
                     +		},
                     +	},
+                    +
                     +	"UnitStatus": {
                     +		Description: "Check status for OpenShift-related systemd units",
                     +		Condition:   systemdRelevant,
                     +		Run: func(env *discovery.Environment) {
                     +			u := env.SystemdUnits
                     +			unitRequiresUnit(env.Log, u["openshift-node"], u["iptables"], `
                     +iptables is used by OpenShift nodes for container networking.
                     +Connections to a container will fail without it.`)
                     +			unitRequiresUnit(env.Log, u["openshift-node"], u["docker"], `OpenShift nodes use Docker to run containers.`)
                     +			unitRequiresUnit(env.Log, u["openshift"], u["docker"], `OpenShift nodes use Docker to run containers.`)
                     +			// node's dependency on openvswitch is a special case.
                     +			// We do not need to enable ovs because openshift-node starts it for us.
                     +			if u["openshift-node"].Active && !u["openvswitch"].Active {
                     +				env.Log.Error("sdUnitSDNreqOVS", `
                     +systemd unit openshift-node is running but openvswitch is not.
                     +Normally openshift-node starts openvswitch once initialized.
                     +It is likely that openvswitch has crashed or been stopped.
+                    +
                     +The software-defined network (SDN) enables networking between
                     +containers on different nodes. Containers will not be able to
                     +connect to each other without the openvswitch service carrying
                     +this traffic.
+                    +
                     +An administrator can start openvswitch with:
+                    +
                     +  # systemctl start openvswitch
+                    +
                     +To ensure it is not repeatedly failing to run, check the status and logs with:
+                    +
                     +  # systemctl status openvswitch
                     +  # journalctl -ru openvswitch `)
                     +			}
                     +			// Anything that is enabled but not running deserves notice
                     +			for name, unit := range u {
                     +				if unit.Enabled && !unit.Active {
                     +					env.Log.Errorm("sdUnitInactive", log.Msg{"tmpl": `
                     +The {{.unit}} systemd unit is intended to start at boot but is not currently active.
                     +An administrator can start the {{.unit}} unit with:
+                    +
                     +  # systemctl start {{.unit}}
+                    +
                     +To ensure it is not failing to run, check the status and logs with:
+                    +
                     +  # systemctl status {{.unit}}
                     +  # journalctl -ru {{.unit}}`, "unit": name})
                     +				}
                     +			}
                     +		},
                     +	},
                     +}
+                    +
                     +//
                     +// -------- Functions used by the diagnostics -----------
                     +//
+                    +
                     +func unitRequiresUnit(logger *log.Logger, unit types.SystemdUnit, requires types.SystemdUnit, reason string) {
                     +	if (unit.Active || unit.Enabled) && !requires.Exists {
                     +		logger.Errorm("sdUnitReqLoaded", log.Msg{"tmpl": `
                     +systemd unit {{.unit}} depends on unit {{.required}}, which is not loaded.
                     +{{.reason}}
                     +An administrator probably needs to install the {{.required}} unit with:
+                    +
                     +  # yum install {{.required}}
+                    +
                     +If it is already installed, you may to reload the definition with:
+                    +
                     +  # systemctl reload {{.required}}
                     +  `, "unit": unit.Name, "required": requires.Name, "reason": reason})
                     +	} else if unit.Active && !requires.Active {
                     +		logger.Errorm("sdUnitReqActive", log.Msg{"tmpl": `
                     +systemd unit {{.unit}} is running but {{.required}} is not.
                     +{{.reason}}
                     +An administrator can start the {{.required}} unit with:
+                    +
                     +  # systemctl start {{.required}}
+                    +
                     +To ensure it is not failing to run, check the status and logs with:
+                    +
                     +  # systemctl status {{.required}}
                     +  # journalctl -ru {{.required}}
                     +  `, "unit": unit.Name, "required": requires.Name, "reason": reason})
                     +	} else if unit.Enabled && !requires.Enabled {
                     +		logger.Warnm("sdUnitReqEnabled", log.Msg{"tmpl": `
                     +systemd unit {{.unit}} is enabled to run automatically at boot, but {{.required}} is not.
                     +{{.reason}}
                     +An administrator can enable the {{.required}} unit with:
+                    +
                     +  # systemctl enable {{.required}}
                     +  `, "unit": unit.Name, "required": requires.Name, "reason": reason})
                     +	}
                     +}
+                    +
                     +func matchLogsSinceLastStart(unit *unitSpec, env *discovery.Environment) {
                     +	cmd := exec.Command("journalctl", "-ru", unit.Name, "--output=json")
                     +	// JSON comes out of journalctl one line per record
                     +	lineReader, reader, err := func(cmd *exec.Cmd) (*bufio.Scanner, io.ReadCloser, error) {
                     +		stdout, err := cmd.StdoutPipe()
                     +		if err == nil {
                     +			lineReader := bufio.NewScanner(stdout)
                     +			if err = cmd.Start(); err == nil {
                     +				return lineReader, stdout, nil
                     +			}
                     +		}
                     +		return nil, nil, err
                     +	}(cmd)
                     +	if err != nil {
                     +		env.Log.Errorm("sdLogReadErr", log.Msg{"tmpl": `
                     +Diagnostics failed to query journalctl for the '{{.unit}}' unit logs.
                     +This should be very unusual, so please report this error:
                     +{{.error}}`, "unit": unit.Name, "error": errStr(err)})
                     +		return
                     +	}
                     +	defer func() { // close out pipe once done reading
                     +		reader.Close()
                     +		cmd.Wait()
                     +	}()
                     +	entryTemplate := logEntry{Message: `json:"MESSAGE"`}
                     +	matchCopy := append([]logMatcher(nil), unit.LogMatchers...) // make a copy, will remove matchers after they match something
                     +	for lineReader.Scan() {                                     // each log entry is a line
                     +		if len(matchCopy) == 0 { // if no rules remain to match
                     +			break // don't waste time reading more log entries
                     +		}
                     +		bytes, entry := lineReader.Bytes(), entryTemplate
                     +		if err := json.Unmarshal(bytes, &entry); err != nil {
                     +			env.Log.Debugm("sdLogBadJSON", log.Msg{"message": string(bytes), "error": errStr(err),
                     +				"tmpl": "Couldn't read the JSON for this log message:\n{{.message}}\nGot error {{.error}}"})
                     +		} else {
                     +			if unit.StartMatch.MatchString(entry.Message) {
                     +				break // saw the log message where the unit started; done looking.
                     +			}
                     +			for index, match := range matchCopy { // match log message against provided matchers
                     +				if strings := match.Regexp.FindStringSubmatch(entry.Message); strings != nil {
                     +					// if matches: print interpretation, remove from matchCopy, and go on to next log entry
                     +					keep := match.KeepAfterMatch
                     +					if match.Interpret != nil {
                     +						keep = match.Interpret(env, &entry, strings)
                     +					} else {
                     +						prelude := fmt.Sprintf("Found '%s' journald log message:\n  %s\n", unit.Name, entry.Message)
                     +						env.Log.Log(match.Level, match.Id, log.Msg{"text": prelude + match.Interpretation, "unit": unit.Name, "logMsg": entry.Message})
                     +					}
                     +					if !keep { // remove matcher once seen
                     +						matchCopy = append(matchCopy[:index], matchCopy[index+1:]...)
                     +					}
                     +					break
                     +				}
                     +			}
                     +		}
                     +	}
                     +}
+                    +
                     +func errStr(err error) string {
                     +	return fmt.Sprintf("(%T) %[1]v", err)
                     +}

pkg/diagnostics/types/diagnostic/type.go

History View file @ 42fb599

                     new file mode 100644
@@ -0,0 +1,16 @@
                     +package diagnostic
+                    +
                     +// This needed to be separate from other types to avoid import cycle
                     +// diagnostic -> discovery -> types
+                    +
                     +import (
                     +	"github.com/openshift/origin/pkg/diagnostics/discovery"
                     +)
+                    +
                     +type DiagnosticCondition func(env *discovery.Environment) (skip bool, reason string)
+                    +
                     +type Diagnostic struct {
                     +	Description string
                     +	Condition   DiagnosticCondition
                     +	Run         func(env *discovery.Environment)
                     +}

pkg/diagnostics/types/systemd_unit.go

History View file @ 42fb599

                     new file mode 100644
@@ -0,0 +1,9 @@
                     +package types
+                    +
                     +type SystemdUnit struct {
                     +	Name       string
                     +	Exists     bool
                     +	Enabled    bool
                     +	Active     bool
                     +	ExitStatus int
                     +}

pkg/diagnostics/types/version.go

History View file @ 42fb599

                     new file mode 100644
@@ -0,0 +1,38 @@
                     +package types
+                    +
                     +import "fmt"
+                    +
                     +type Version struct {
                     +	X, Y, Z int
                     +}
+                    +
                     +func (a Version) Eq(b Version) bool {
                     +	return a.X == b.X && a.Y == b.Y && a.Z == b.Z
                     +}
+                    +
                     +func (a Version) Gt(b Version) bool {
                     +	if a.X > b.X {
                     +		return true
                     +	}
                     +	if a.X < b.X {
                     +		return false
                     +	} // so, Xs are equal
                     +	if a.Y > b.Y {
                     +		return true
                     +	}
                     +	if a.Y < b.Y {
                     +		return false
                     +	} // so, Ys are equal
                     +	if a.Z > b.Z {
                     +		return true
                     +	}
                     +	return false
                     +}
+                    +
                     +func (v Version) GoString() string {
                     +	return fmt.Sprintf("%d.%d.%d", v.X, v.Y, v.Z)
                     +}
+                    +
                     +func (v Version) NonZero() bool {
                     +	return !v.Eq(Version{0, 0, 0})
                     +}