package network
import (
"errors"
"fmt"
"os"
"os/signal"
"strconv"
"strings"
"syscall"
flag "github.com/spf13/pflag"
kapi "k8s.io/kubernetes/pkg/api"
kclientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
osclient "github.com/openshift/origin/pkg/client"
osclientcmd "github.com/openshift/origin/pkg/cmd/util/clientcmd"
"github.com/openshift/origin/pkg/diagnostics/networkpod/util"
"github.com/openshift/origin/pkg/diagnostics/types"
)
const (
NetworkDiagnosticName = "NetworkCheck"
)
// NetworkDiagnostic is a diagnostic that runs a network diagnostic pod and relays the results.
type NetworkDiagnostic struct {
KubeClient *kclientset.Clientset
OSClient *osclient.Client
ClientFlags *flag.FlagSet
Level int
Factory *osclientcmd.Factory
PreventModification bool
LogDir string
pluginName string
nodes []kapi.Node
nsName1 string
nsName2 string
globalnsName1 string
globalnsName2 string
res types.DiagnosticResult
}
// Name is part of the Diagnostic interface and just returns name.
func (d *NetworkDiagnostic) Name() string {
return NetworkDiagnosticName
}
// Description is part of the Diagnostic interface and provides a user-focused description of what the diagnostic does.
func (d *NetworkDiagnostic) Description() string {
return "Create a pod on all schedulable nodes and run network diagnostics from the application standpoint"
}
// CanRun is part of the Diagnostic interface; it determines if the conditions are right to run this diagnostic.
func (d *NetworkDiagnostic) CanRun() (bool, error) {
if d.PreventModification {
return false, errors.New("running the network diagnostic pod is an API change, which is prevented as you indicated")
} else if d.KubeClient == nil {
return false, errors.New("must have kube client")
} else if d.OSClient == nil {
return false, errors.New("must have openshift client")
} else if _, err := d.getKubeConfig(); err != nil {
return false, err
}
return true, nil
}
// Check is part of the Diagnostic interface; it runs the actual diagnostic logic
func (d *NetworkDiagnostic) Check() types.DiagnosticResult {
d.res = types.NewDiagnosticResult(NetworkDiagnosticName)
var err error
var ok bool
d.pluginName, ok, err = util.GetOpenShiftNetworkPlugin(d.OSClient)
if err != nil {
d.res.Error("DNet2001", err, fmt.Sprintf("Checking network plugin failed. Error: %s", err))
return d.res
}
if !ok {
d.res.Warn("DNet2002", nil, "Skipping network diagnostics check. Reason: Not using openshift network plugin.")
return d.res
}
d.nodes, err = util.GetSchedulableNodes(d.KubeClient)
if err != nil {
d.res.Error("DNet2003", err, fmt.Sprintf("Fetching schedulable nodes failed. Error: %s", err))
return d.res
}
if len(d.nodes) == 0 {
d.res.Warn("DNet2004", nil, "Skipping network checks. Reason: No schedulable/ready nodes found.")
return d.res
}
if len(d.LogDir) == 0 {
d.LogDir = util.NetworkDiagDefaultLogDir
}
d.runNetworkDiagnostic()
return d.res
}
func (d *NetworkDiagnostic) runNetworkDiagnostic() {
// Do clean up if there is an interrupt/terminate signal while running network diagnostics
c := make(chan os.Signal, 2)
signal.Notify(c, os.Interrupt, syscall.SIGTERM)
go func() {
<-c
d.Cleanup()
}()
defer func() {
d.Cleanup()
}()
// Setup test environment
if err := d.TestSetup(); err != nil {
d.res.Error("DNet2005", err, fmt.Sprintf("Setting up test environment for network diagnostics failed: %v", err))
return
}
// Need to show summary at least
loglevel := d.Level
if loglevel > 2 {
loglevel = 2
}
// TEST Phase: Run network diagnostic pod on all valid nodes in parallel
command := []string{"chroot", util.NetworkDiagContainerMountPath, "openshift", "infra", "network-diagnostic-pod", "-l", strconv.Itoa(loglevel)}
if err := d.runNetworkPod(command); err != nil {
d.res.Error("DNet2006", err, err.Error())
return
}
// Wait for network diagnostic pod completion
if err := d.waitForNetworkPod(d.nsName1, util.NetworkDiagPodNamePrefix, []kapi.PodPhase{kapi.PodSucceeded, kapi.PodFailed}); err != nil {
d.res.Error("DNet2007", err, err.Error())
return
}
// Gather logs from network diagnostic pod on all valid nodes
diagsFailed := false
if err := d.CollectNetworkPodLogs(); err != nil {
d.res.Error("DNet2008", err, err.Error())
diagsFailed = true
}
// Collection Phase: Run network diagnostic pod on all valid nodes
command = []string{"chroot", util.NetworkDiagContainerMountPath, "sleep", "1000"}
if err := d.runNetworkPod(command); err != nil {
d.res.Error("DNet2009", err, err.Error())
return
}
// Wait for network diagnostic pod to start
if err := d.waitForNetworkPod(d.nsName1, util.NetworkDiagPodNamePrefix, []kapi.PodPhase{kapi.PodRunning, kapi.PodFailed, kapi.PodSucceeded}); err != nil {
d.res.Error("DNet2010", err, err.Error())
// Do not bail out here, collect what ever info is available from all valid nodes
}
if err := d.CollectNetworkInfo(diagsFailed); err != nil {
d.res.Error("DNet2011", err, err.Error())
}
if diagsFailed {
d.res.Info("DNet2012", fmt.Sprintf("Additional info collected under %q for further analysis", d.LogDir))
}
return
}
func (d *NetworkDiagnostic) runNetworkPod(command []string) error {
for _, node := range d.nodes {
podName := kapi.SimpleNameGenerator.GenerateName(fmt.Sprintf("%s-", util.NetworkDiagPodNamePrefix))
pod := GetNetworkDiagnosticsPod(command, podName, node.Name)
_, err := d.KubeClient.Core().Pods(d.nsName1).Create(pod)
if err != nil {
return fmt.Errorf("Creating network diagnostic pod %q on node %q with command %q failed: %v", podName, node.Name, strings.Join(command, " "), err)
}
d.res.Debug("DNet2013", fmt.Sprintf("Created network diagnostic pod %q on node %q with command: %q", podName, node.Name, strings.Join(command, " ")))
}
return nil
}