network.go
5cecd548
 package docker
 
 import (
e0e49b9a
 	"encoding/binary"
797bb6e7
 	"errors"
5039d4a2
 	"fmt"
930e9a7e
 	"io"
799ffa17
 	"log"
5cecd548
 	"net"
799ffa17
 	"os/exec"
 	"strconv"
 	"strings"
a5fb1d6c
 	"sync"
5cecd548
 )
 
f39af7e0
 var NetworkBridgeIface string
 
5cecd548
 const (
8cf30395
 	DefaultNetworkBridge = "docker0"
1b370f9d
 	portRangeStart       = 49153
 	portRangeEnd         = 65535
5cecd548
 )
 
799ffa17
 // Calculates the first and last IP addresses in an IPNet
e0e49b9a
 func networkRange(network *net.IPNet) (net.IP, net.IP) {
 	netIP := network.IP.To4()
 	firstIP := netIP.Mask(network.Mask)
 	lastIP := net.IPv4(0, 0, 0, 0).To4()
 	for i := 0; i < len(lastIP); i++ {
 		lastIP[i] = netIP[i] | ^network.Mask[i]
 	}
 	return firstIP, lastIP
 }
 
90a6e310
 // Detects overlap between one IPNet and another
 func networkOverlaps(netX *net.IPNet, netY *net.IPNet) bool {
 	firstIP, _ := networkRange(netX)
 	if netY.Contains(firstIP) {
 		return true
 	}
 	firstIP, _ = networkRange(netY)
 	if netX.Contains(firstIP) {
 		return true
 	}
 	return false
 }
 
799ffa17
 // Converts a 4 bytes IP into a 32 bit integer
6f9a67a7
 func ipToInt(ip net.IP) int32 {
 	return int32(binary.BigEndian.Uint32(ip.To4()))
e0e49b9a
 }
 
799ffa17
 // Converts 32 bit integer into a 4 bytes IP address
6f9a67a7
 func intToIp(n int32) net.IP {
 	b := make([]byte, 4)
 	binary.BigEndian.PutUint32(b, uint32(n))
 	return net.IP(b)
e0e49b9a
 }
 
799ffa17
 // Given a netmask, calculates the number of available hosts
6f9a67a7
 func networkSize(mask net.IPMask) int32 {
c08f5b2b
 	m := net.IPv4Mask(0, 0, 0, 0)
e0e49b9a
 	for i := 0; i < net.IPv4len; i++ {
c08f5b2b
 		m[i] = ^mask[i]
e0e49b9a
 	}
6f9a67a7
 
 	return int32(binary.BigEndian.Uint32(m)) + 1
e0e49b9a
 }
 
aa4bf428
 //Wrapper around the ip command
 func ip(args ...string) (string, error) {
 	path, err := exec.LookPath("ip")
 	if err != nil {
 		return "", fmt.Errorf("command not found: ip")
 	}
 	output, err := exec.Command(path, args...).CombinedOutput()
 	if err != nil {
 		return "", fmt.Errorf("ip failed: ip %v", strings.Join(args, " "))
 	}
 	return string(output), nil
 }
 
799ffa17
 // Wrapper around the iptables command
 func iptables(args ...string) error {
dfc3904f
 	path, err := exec.LookPath("iptables")
 	if err != nil {
c66d2b6a
 		return fmt.Errorf("command not found: iptables")
dfc3904f
 	}
 	if err := exec.Command(path, args...).Run(); err != nil {
799ffa17
 		return fmt.Errorf("iptables failed: iptables %v", strings.Join(args, " "))
 	}
 	return nil
 }
 
aa4bf428
 func checkRouteOverlaps(dockerNetwork *net.IPNet) error {
 	output, err := ip("route")
 	if err != nil {
 		return err
 	}
 	Debugf("Routes:\n\n%s", output)
 	for _, line := range strings.Split(output, "\n") {
 		if strings.Trim(line, "\r\n\t ") == "" || strings.Contains(line, "default") {
 			continue
 		}
 		if _, network, err := net.ParseCIDR(strings.Split(line, " ")[0]); err != nil {
 			return fmt.Errorf("Unexpected ip route output: %s (%s)", err, line)
 		} else if networkOverlaps(dockerNetwork, network) {
 			return fmt.Errorf("Network %s is already routed: '%s'", dockerNetwork.String(), line)
 		}
 	}
 	return nil
 }
 
 func CreateBridgeIface(ifaceName string) error {
1601366c
 	// FIXME: try more IP ranges
 	// FIXME: try bigger ranges! /24 is too small.
aa4bf428
 	addrs := []string{"172.16.42.1/24", "10.0.42.1/24", "192.168.42.1/24"}
 
 	var ifaceAddr string
 	for _, addr := range addrs {
 		_, dockerNetwork, err := net.ParseCIDR(addr)
 		if err != nil {
 			return err
 		}
 		if err := checkRouteOverlaps(dockerNetwork); err == nil {
 			ifaceAddr = addr
 			break
 		} else {
 			Debugf("%s: %s", addr, err)
 		}
 	}
 	if ifaceAddr == "" {
1601366c
 		return fmt.Errorf("Could not find a free IP address range for interface '%s'. Please configure its address manually and run 'docker -b %s'", ifaceName, ifaceName)
aa4bf428
 	} else {
 		Debugf("Creating bridge %s with network %s", ifaceName, ifaceAddr)
 	}
 
 	if output, err := ip("link", "add", ifaceName, "type", "bridge"); err != nil {
 		return fmt.Errorf("Error creating bridge: %s (output: %s)", err, output)
 	}
 
 	if output, err := ip("addr", "add", ifaceAddr, "dev", ifaceName); err != nil {
 		return fmt.Errorf("Unable to add private network: %s (%s)", err, output)
 	}
 	if output, err := ip("link", "set", ifaceName, "up"); err != nil {
 		return fmt.Errorf("Unable to start network bridge: %s (%s)", err, output)
 	}
 	if err := iptables("-t", "nat", "-A", "POSTROUTING", "-s", ifaceAddr,
 		"!", "-d", ifaceAddr, "-j", "MASQUERADE"); err != nil {
 		return fmt.Errorf("Unable to enable network bridge NAT: %s", err)
 	}
 	return nil
 }
 
799ffa17
 // Return the IPv4 address of a network interface
c08f5b2b
 func getIfaceAddr(name string) (net.Addr, error) {
5039d4a2
 	iface, err := net.InterfaceByName(name)
 	if err != nil {
 		return nil, err
 	}
 	addrs, err := iface.Addrs()
 	if err != nil {
 		return nil, err
 	}
 	var addrs4 []net.Addr
 	for _, addr := range addrs {
 		ip := (addr.(*net.IPNet)).IP
 		if ip4 := ip.To4(); len(ip4) == net.IPv4len {
 			addrs4 = append(addrs4, addr)
 		}
 	}
 	switch {
 	case len(addrs4) == 0:
799ffa17
 		return nil, fmt.Errorf("Interface %v has no IP addresses", name)
5039d4a2
 	case len(addrs4) > 1:
3aefed2d
 		fmt.Printf("Interface %v has more than 1 IPv4 address. Defaulting to using %v\n",
 			name, (addrs4[0].(*net.IPNet)).IP)
5039d4a2
 	}
 	return addrs4[0], nil
 }
 
799ffa17
 // Port mapper takes care of mapping external ports to containers by setting
 // up iptables rules.
 // It keeps track of all mappings and is able to unmap at will
 type PortMapper struct {
 	mapping map[int]net.TCPAddr
7f1a32b9
 	proxies map[int]net.Listener
799ffa17
 }
 
 func (mapper *PortMapper) cleanup() error {
 	// Ignore errors - This could mean the chains were never set up
3b65be91
 	iptables("-t", "nat", "-D", "PREROUTING", "-m", "addrtype", "--dst-type", "LOCAL", "-j", "DOCKER")
61259ab4
 	iptables("-t", "nat", "-D", "OUTPUT", "-m", "addrtype", "--dst-type", "LOCAL", "!", "--dst", "127.0.0.0/8", "-j", "DOCKER")
 	iptables("-t", "nat", "-D", "OUTPUT", "-m", "addrtype", "--dst-type", "LOCAL", "-j", "DOCKER") // Created in versions <= 0.1.6
ebc83795
 	// Also cleanup rules created by older versions, or -X might fail.
 	iptables("-t", "nat", "-D", "PREROUTING", "-j", "DOCKER")
 	iptables("-t", "nat", "-D", "OUTPUT", "-j", "DOCKER")
799ffa17
 	iptables("-t", "nat", "-F", "DOCKER")
 	iptables("-t", "nat", "-X", "DOCKER")
 	mapper.mapping = make(map[int]net.TCPAddr)
7f1a32b9
 	mapper.proxies = make(map[int]net.Listener)
799ffa17
 	return nil
 }
 
 func (mapper *PortMapper) setup() error {
 	if err := iptables("-t", "nat", "-N", "DOCKER"); err != nil {
523803d6
 		return fmt.Errorf("Failed to create DOCKER chain: %s", err)
5039d4a2
 	}
3b65be91
 	if err := iptables("-t", "nat", "-A", "PREROUTING", "-m", "addrtype", "--dst-type", "LOCAL", "-j", "DOCKER"); err != nil {
523803d6
 		return fmt.Errorf("Failed to inject docker in PREROUTING chain: %s", err)
799ffa17
 	}
61259ab4
 	if err := iptables("-t", "nat", "-A", "OUTPUT", "-m", "addrtype", "--dst-type", "LOCAL", "!", "--dst", "127.0.0.0/8", "-j", "DOCKER"); err != nil {
523803d6
 		return fmt.Errorf("Failed to inject docker in OUTPUT chain: %s", err)
3c6b8bb8
 	}
799ffa17
 	return nil
 }
 
 func (mapper *PortMapper) iptablesForward(rule string, port int, dest net.TCPAddr) error {
 	return iptables("-t", "nat", rule, "DOCKER", "-p", "tcp", "--dport", strconv.Itoa(port),
 		"-j", "DNAT", "--to-destination", net.JoinHostPort(dest.IP.String(), strconv.Itoa(dest.Port)))
 }
 
 func (mapper *PortMapper) Map(port int, dest net.TCPAddr) error {
 	if err := mapper.iptablesForward("-A", port, dest); err != nil {
 		return err
 	}
930e9a7e
 
799ffa17
 	mapper.mapping[port] = dest
930e9a7e
 	listener, err := net.Listen("tcp", fmt.Sprintf("127.0.0.1:%d", port))
 	if err != nil {
 		mapper.Unmap(port)
 		return err
 	}
7f1a32b9
 	mapper.proxies[port] = listener
930e9a7e
 	go proxy(listener, "tcp", dest.String())
799ffa17
 	return nil
 }
c08f5b2b
 
930e9a7e
 // proxy listens for socket connections on `listener`, and forwards them unmodified
 // to `proto:address`
 func proxy(listener net.Listener, proto, address string) error {
 	Debugf("proxying to %s:%s", proto, address)
 	defer Debugf("Done proxying to %s:%s", proto, address)
 	for {
 		Debugf("Listening on %s", listener)
 		src, err := listener.Accept()
 		if err != nil {
 			return err
 		}
 		Debugf("Connecting to %s:%s", proto, address)
 		dst, err := net.Dial(proto, address)
 		if err != nil {
 			log.Printf("Error connecting to %s:%s: %s", proto, address, err)
 			src.Close()
 			continue
 		}
 		Debugf("Connected to backend, splicing")
 		splice(src, dst)
 	}
 	return nil
 }
 
 func halfSplice(dst, src net.Conn) error {
 	_, err := io.Copy(dst, src)
 	// FIXME: on EOF from a tcp connection, pass WriteClose()
 	dst.Close()
 	src.Close()
 	return err
 }
 
 func splice(a, b net.Conn) {
 	go halfSplice(a, b)
 	go halfSplice(b, a)
 }
 
799ffa17
 func (mapper *PortMapper) Unmap(port int) error {
 	dest, ok := mapper.mapping[port]
 	if !ok {
 		return errors.New("Port is not mapped")
 	}
7f1a32b9
 	if proxy, exists := mapper.proxies[port]; exists {
 		proxy.Close()
 		delete(mapper.proxies, port)
 	}
799ffa17
 	if err := mapper.iptablesForward("-D", port, dest); err != nil {
 		return err
e0e49b9a
 	}
799ffa17
 	delete(mapper.mapping, port)
 	return nil
 }
 
 func newPortMapper() (*PortMapper, error) {
 	mapper := &PortMapper{}
 	if err := mapper.cleanup(); err != nil {
c08f5b2b
 		return nil, err
5cecd548
 	}
799ffa17
 	if err := mapper.setup(); err != nil {
 		return nil, err
 	}
 	return mapper, nil
5cecd548
 }
797bb6e7
 
799ffa17
 // Port allocator: Atomatically allocate and release networking ports
 type PortAllocator struct {
2aad4a34
 	inUse    map[int]struct{}
a5fb1d6c
 	fountain chan (int)
2aad4a34
 	lock     sync.Mutex
797bb6e7
 }
 
a5fb1d6c
 func (alloc *PortAllocator) runFountain() {
 	for {
 		for port := portRangeStart; port < portRangeEnd; port++ {
 			alloc.fountain <- port
 		}
797bb6e7
 	}
 }
 
a5fb1d6c
 // FIXME: Release can no longer fail, change its prototype to reflect that.
799ffa17
 func (alloc *PortAllocator) Release(port int) error {
febaeebf
 	Debugf("Releasing %d", port)
a5fb1d6c
 	alloc.lock.Lock()
 	delete(alloc.inUse, port)
 	alloc.lock.Unlock()
 	return nil
 }
 
 func (alloc *PortAllocator) Acquire(port int) (int, error) {
febaeebf
 	Debugf("Acquiring %d", port)
a5fb1d6c
 	if port == 0 {
 		// Allocate a port from the fountain
 		for port := range alloc.fountain {
 			if _, err := alloc.Acquire(port); err == nil {
 				return port, nil
 			}
 		}
 		return -1, fmt.Errorf("Port generator ended unexpectedly")
797bb6e7
 	}
a5fb1d6c
 	alloc.lock.Lock()
 	defer alloc.lock.Unlock()
 	if _, inUse := alloc.inUse[port]; inUse {
 		return -1, fmt.Errorf("Port already in use: %d", port)
 	}
 	alloc.inUse[port] = struct{}{}
 	return port, nil
797bb6e7
 }
 
a5fb1d6c
 func newPortAllocator() (*PortAllocator, error) {
 	allocator := &PortAllocator{
22893429
 		inUse:    make(map[int]struct{}),
d32f1846
 		fountain: make(chan int),
a5fb1d6c
 	}
 	go allocator.runFountain()
799ffa17
 	return allocator, nil
 }
 
 // IP allocator: Atomatically allocate and release networking ports
 type IPAllocator struct {
6f9a67a7
 	network       *net.IPNet
 	queueAlloc    chan allocatedIP
 	queueReleased chan net.IP
 	inUse         map[int32]struct{}
 }
 
 type allocatedIP struct {
 	ip  net.IP
 	err error
799ffa17
 }
 
6f9a67a7
 func (alloc *IPAllocator) run() {
799ffa17
 	firstIP, _ := networkRange(alloc.network)
6f9a67a7
 	ipNum := ipToInt(firstIP)
 	ownIP := ipToInt(alloc.network.IP)
 	size := networkSize(alloc.network.Mask)
 
 	pos := int32(1)
 	max := size - 2 // -1 for the broadcast address, -1 for the gateway address
 	for {
 		var (
 			newNum int32
 			inUse  bool
 		)
 
 		// Find first unused IP, give up after one whole round
 		for attempt := int32(0); attempt < max; attempt++ {
 			newNum = ipNum + pos
 
 			pos = pos%max + 1
 
 			// The network's IP is never okay to use
 			if newNum == ownIP {
 				continue
 			}
 
 			if _, inUse = alloc.inUse[newNum]; !inUse {
 				// We found an unused IP
 				break
 			}
797bb6e7
 		}
6f9a67a7
 
 		ip := allocatedIP{ip: intToIp(newNum)}
 		if inUse {
 			ip.err = errors.New("No unallocated IP available")
797bb6e7
 		}
6f9a67a7
 
 		select {
 		case alloc.queueAlloc <- ip:
 			alloc.inUse[newNum] = struct{}{}
 		case released := <-alloc.queueReleased:
 			r := ipToInt(released)
 			delete(alloc.inUse, r)
 
 			if inUse {
 				// If we couldn't allocate a new IP, the released one
 				// will be the only free one now, so instantly use it
 				// next time
 				pos = r - ipNum
 			} else {
 				// Use same IP as last time
 				if pos == 1 {
 					pos = max
 				} else {
 					pos--
 				}
 			}
797bb6e7
 		}
799ffa17
 	}
 }
 
 func (alloc *IPAllocator) Acquire() (net.IP, error) {
6f9a67a7
 	ip := <-alloc.queueAlloc
 	return ip.ip, ip.err
799ffa17
 }
 
6f9a67a7
 func (alloc *IPAllocator) Release(ip net.IP) {
 	alloc.queueReleased <- ip
797bb6e7
 }
 
6f9a67a7
 func newIPAllocator(network *net.IPNet) *IPAllocator {
799ffa17
 	alloc := &IPAllocator{
6f9a67a7
 		network:       network,
 		queueAlloc:    make(chan allocatedIP),
 		queueReleased: make(chan net.IP),
 		inUse:         make(map[int32]struct{}),
799ffa17
 	}
6f9a67a7
 
 	go alloc.run()
 
 	return alloc
799ffa17
 }
 
 // Network interface represents the networking stack of a container
 type NetworkInterface struct {
 	IPNet   net.IPNet
 	Gateway net.IP
 
 	manager  *NetworkManager
 	extPorts []int
 }
 
 // Allocate an external TCP port and map it to the interface
2aad4a34
 func (iface *NetworkInterface) AllocatePort(spec string) (*Nat, error) {
 	nat, err := parseNat(spec)
799ffa17
 	if err != nil {
2aad4a34
 		return nil, err
 	}
 	// Allocate a random port if Frontend==0
 	if extPort, err := iface.manager.portAllocator.Acquire(nat.Frontend); err != nil {
 		return nil, err
 	} else {
 		nat.Frontend = extPort
 	}
 	if err := iface.manager.portMapper.Map(nat.Frontend, net.TCPAddr{IP: iface.IPNet.IP, Port: nat.Backend}); err != nil {
 		iface.manager.portAllocator.Release(nat.Frontend)
 		return nil, err
 	}
 	iface.extPorts = append(iface.extPorts, nat.Frontend)
 	return nat, nil
 }
 
 type Nat struct {
 	Proto    string
 	Frontend int
 	Backend  int
 }
 
 func parseNat(spec string) (*Nat, error) {
 	var nat Nat
 	// If spec starts with ':', external and internal ports must be the same.
 	// This might fail if the requested external port is not available.
 	var sameFrontend bool
 	if spec[0] == ':' {
 		sameFrontend = true
 		spec = spec[1:]
 	}
 	port, err := strconv.ParseUint(spec, 10, 16)
 	if err != nil {
 		return nil, err
799ffa17
 	}
2aad4a34
 	nat.Backend = int(port)
 	if sameFrontend {
 		nat.Frontend = nat.Backend
799ffa17
 	}
2aad4a34
 	nat.Proto = "tcp"
 	return &nat, nil
799ffa17
 }
 
 // Release: Network cleanup - release all resources
6f9a67a7
 func (iface *NetworkInterface) Release() {
799ffa17
 	for _, port := range iface.extPorts {
 		if err := iface.manager.portMapper.Unmap(port); err != nil {
 			log.Printf("Unable to unmap port %v: %v", port, err)
 		}
 		if err := iface.manager.portAllocator.Release(port); err != nil {
 			log.Printf("Unable to release port %v: %v", port, err)
 		}
 
 	}
6f9a67a7
 
 	iface.manager.ipAllocator.Release(iface.IPNet.IP)
799ffa17
 }
 
 // Network Manager manages a set of network interfaces
 // Only *one* manager per host machine should be used
 type NetworkManager struct {
 	bridgeIface   string
 	bridgeNetwork *net.IPNet
 
 	ipAllocator   *IPAllocator
 	portAllocator *PortAllocator
 	portMapper    *PortMapper
 }
 
 // Allocate a network interface
 func (manager *NetworkManager) Allocate() (*NetworkInterface, error) {
 	ip, err := manager.ipAllocator.Acquire()
797bb6e7
 	if err != nil {
c08f5b2b
 		return nil, err
797bb6e7
 	}
c08f5b2b
 	iface := &NetworkInterface{
ab99e925
 		IPNet:   net.IPNet{IP: ip, Mask: manager.bridgeNetwork.Mask},
799ffa17
 		Gateway: manager.bridgeNetwork.IP,
 		manager: manager,
c08f5b2b
 	}
 	return iface, nil
 }
 
799ffa17
 func newNetworkManager(bridgeIface string) (*NetworkManager, error) {
 	addr, err := getIfaceAddr(bridgeIface)
 	if err != nil {
aa4bf428
 		// If the iface is not found, try to create it
 		if err := CreateBridgeIface(bridgeIface); err != nil {
 			return nil, err
 		}
 		addr, err = getIfaceAddr(bridgeIface)
 		if err != nil {
 			return nil, err
 		}
799ffa17
 	}
 	network := addr.(*net.IPNet)
 
6f9a67a7
 	ipAllocator := newIPAllocator(network)
799ffa17
 
a5fb1d6c
 	portAllocator, err := newPortAllocator()
799ffa17
 	if err != nil {
 		return nil, err
 	}
 
 	portMapper, err := newPortMapper()
37122552
 	if err != nil {
 		return nil, err
 	}
799ffa17
 
 	manager := &NetworkManager{
 		bridgeIface:   bridgeIface,
 		bridgeNetwork: network,
 		ipAllocator:   ipAllocator,
 		portAllocator: portAllocator,
 		portMapper:    portMapper,
 	}
 	return manager, nil
797bb6e7
 }