In the cluster admin diagnostic which scans the registry pod logs, add an
explicit check for an error which indicates the registry cannot write to disk,
which is a strong indicator for an selinux problem. The fix from the origin
troubleshooting page is then suggested.
We also watch for a successful write log entry, and if found *after* the error,
we know the problem has since been fixed and the diagnostic error can be safely
ignored.
... | ... |
@@ -4,6 +4,7 @@ import ( |
4 | 4 |
"bufio" |
5 | 5 |
"fmt" |
6 | 6 |
"reflect" |
7 |
+ "regexp" |
|
7 | 8 |
"strings" |
8 | 9 |
|
9 | 10 |
kapi "k8s.io/kubernetes/pkg/api" |
... | ... |
@@ -92,6 +93,21 @@ Please examine the log entries to determine if there might be |
92 | 92 |
any related problems: |
93 | 93 |
%s` |
94 | 94 |
|
95 |
+ clRegSelinuxErr = ` |
|
96 |
+The pod logs for the "%s" pod belonging to |
|
97 |
+the "%s" service indicated the registry is unable to write to disk. |
|
98 |
+This may indicate an SELinux denial, or problems with volume |
|
99 |
+ownership/permissions. |
|
100 |
+ |
|
101 |
+For volume permission problems please consult the Persistent Storage section |
|
102 |
+of the Administrator's Guide. |
|
103 |
+ |
|
104 |
+In the case of SELinux this may be resolved on the node by running: |
|
105 |
+ |
|
106 |
+ sudo chcon -R -t svirt_sandbox_file_t [PATH_TO]/openshift.local.volumes |
|
107 |
+ |
|
108 |
+%s` |
|
109 |
+ |
|
95 | 110 |
clRegNoEP = ` |
96 | 111 |
The "%[1]s" service exists with %d associated pod(s), but there |
97 | 112 |
are %d endpoints in the "%[1]s" service. |
... | ... |
@@ -221,8 +237,15 @@ func (d *ClusterRegistry) checkRegistryLogs(pod *kapi.Pod, r types.DiagnosticRes |
221 | 221 |
} |
222 | 222 |
defer readCloser.Close() |
223 | 223 |
|
224 |
+ // Indicator that selinux is blocking the registry from writing to disk: |
|
225 |
+ selinuxErrorRegex, _ := regexp.Compile(".*level=error.*mkdir.*permission denied.*") |
|
226 |
+ // If seen after the above error regex, we know the problem has since been fixed: |
|
227 |
+ selinuxSuccessRegex, _ := regexp.Compile(".*level=info.*response completed.*http.request.method=PUT.*") |
|
228 |
+ |
|
224 | 229 |
clientError := "" |
225 | 230 |
registryError := "" |
231 |
+ selinuxError := "" |
|
232 |
+ |
|
226 | 233 |
scanner := bufio.NewScanner(readCloser) |
227 | 234 |
for scanner.Scan() { |
228 | 235 |
logLine := scanner.Text() |
... | ... |
@@ -230,6 +253,12 @@ func (d *ClusterRegistry) checkRegistryLogs(pod *kapi.Pod, r types.DiagnosticRes |
230 | 230 |
// https://github.com/kubernetes/kubernetes/issues/12447 |
231 | 231 |
if strings.Contains(logLine, `level=error msg="client error:`) { |
232 | 232 |
clientError = logLine // end up showing only the most recent client error |
233 |
+ } else if selinuxErrorRegex.MatchString(logLine) { |
|
234 |
+ selinuxError = logLine |
|
235 |
+ } else if selinuxSuccessRegex.MatchString(logLine) { |
|
236 |
+ // Check for a successful registry push, if this occurs after a selinux error |
|
237 |
+ // we can safely clear it, the problem has already been fixed. |
|
238 |
+ selinuxError = "" |
|
233 | 239 |
} else if strings.Contains(logLine, "level=error msg=") { |
234 | 240 |
registryError += "\n" + logLine // gather generic errors |
235 | 241 |
} |
... | ... |
@@ -237,10 +266,12 @@ func (d *ClusterRegistry) checkRegistryLogs(pod *kapi.Pod, r types.DiagnosticRes |
237 | 237 |
if clientError != "" { |
238 | 238 |
r.Error("DClu1011", nil, fmt.Sprintf(clRegPodConn, pod.ObjectMeta.Name, registryName, clientError)) |
239 | 239 |
} |
240 |
+ if selinuxError != "" { |
|
241 |
+ r.Error("DClu1020", nil, fmt.Sprintf(clRegSelinuxErr, pod.ObjectMeta.Name, registryName, selinuxError)) |
|
242 |
+ } |
|
240 | 243 |
if registryError != "" { |
241 | 244 |
r.Warn("DClu1012", nil, fmt.Sprintf(clRegPodErr, pod.ObjectMeta.Name, registryName, registryError)) |
242 | 245 |
} |
243 |
- |
|
244 | 246 |
} |
245 | 247 |
|
246 | 248 |
func (d *ClusterRegistry) checkRegistryEndpoints(pods []*kapi.Pod, r types.DiagnosticResult) bool { |