Without TCP keep-alive set on socket connections to the daemon, any
long-running container with std{out,err,in} attached that doesn't
read/write for a minute or longer will end in ECONNTIMEDOUT (depending
on network settings/OS defaults, etc.), leaving the docker client side
believing it is still waiting on data with no actual underlying socket
connection.
This patch turns on TCP keep-alive for the underlying TCP connection
for both TLS and standard HTTP hijacked daemon connections from the
docker client, with a keep-alive timeout of 30 seconds.
Docker-DCO-1.1-Signed-off-by: Phil Estes <estesp@linux.vnet.ibm.com>
| ... | ... |
@@ -72,6 +72,15 @@ func tlsDialWithDialer(dialer *net.Dialer, network, addr string, config *tls.Con |
| 72 | 72 |
if err != nil {
|
| 73 | 73 |
return nil, err |
| 74 | 74 |
} |
| 75 |
+ // When we set up a TCP connection for hijack, there could be long periods |
|
| 76 |
+ // of inactivity (a long running command with no output) that in certain |
|
| 77 |
+ // network setups may cause ECONNTIMEOUT, leaving the client in an unknown |
|
| 78 |
+ // state. Setting TCP KeepAlive on the socket connection will prohibit |
|
| 79 |
+ // ECONNTIMEOUT unless the socket connection truly is broken |
|
| 80 |
+ if tcpConn, ok := rawConn.(*net.TCPConn); ok {
|
|
| 81 |
+ tcpConn.SetKeepAlive(true) |
|
| 82 |
+ tcpConn.SetKeepAlivePeriod(30 * time.Second) |
|
| 83 |
+ } |
|
| 75 | 84 |
|
| 76 | 85 |
colonPos := strings.LastIndex(addr, ":") |
| 77 | 86 |
if colonPos == -1 {
|
| ... | ... |
@@ -140,6 +149,15 @@ func (cli *DockerCli) hijack(method, path string, setRawTerminal bool, in io.Rea |
| 140 | 140 |
req.Host = cli.addr |
| 141 | 141 |
|
| 142 | 142 |
dial, err := cli.dial() |
| 143 |
+ // When we set up a TCP connection for hijack, there could be long periods |
|
| 144 |
+ // of inactivity (a long running command with no output) that in certain |
|
| 145 |
+ // network setups may cause ECONNTIMEOUT, leaving the client in an unknown |
|
| 146 |
+ // state. Setting TCP KeepAlive on the socket connection will prohibit |
|
| 147 |
+ // ECONNTIMEOUT unless the socket connection truly is broken |
|
| 148 |
+ if tcpConn, ok := dial.(*net.TCPConn); ok {
|
|
| 149 |
+ tcpConn.SetKeepAlive(true) |
|
| 150 |
+ tcpConn.SetKeepAlivePeriod(30 * time.Second) |
|
| 151 |
+ } |
|
| 143 | 152 |
if err != nil {
|
| 144 | 153 |
if strings.Contains(err.Error(), "connection refused") {
|
| 145 | 154 |
return fmt.Errorf("Cannot connect to the Docker daemon. Is 'docker -d' running on this host?")
|