// +build linux,seccomp

package native

import "github.com/opencontainers/runc/libcontainer/configs"

var defaultSeccompProfile = &configs.Seccomp{
	DefaultAction: configs.Allow,
	Syscalls: []*configs.Syscall{
		{
			// Quota and Accounting syscalls which could let containers
			// disable their own resource limits or process accounting
			Name:   "acct",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Prevent containers from using the kernel keyring,
			// which is not namespaced
			Name:   "add_key",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Similar to clock_settime and settimeofday
			// Time/Date is not namespaced
			Name:   "adjtimex",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Deny loading potentially persistent bpf programs into kernel
			// already gated by CAP_SYS_ADMIN
			Name:   "bpf",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Time/Date is not namespaced
			Name:   "clock_adjtime",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Time/Date is not namespaced
			Name:   "clock_settime",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Deny cloning new namespaces
			Name:   "clone",
			Action: configs.Errno,
			Args: []*configs.Arg{
				{
					// flags from sched.h
					// CLONE_NEWUTS		0x04000000
					// CLONE_NEWIPC		0x08000000
					// CLONE_NEWUSER	0x10000000
					// CLONE_NEWPID		0x20000000
					// CLONE_NEWNET		0x40000000
					Index: 0,
					Value: uint64(0x04000000),
					Op:    configs.GreaterThanOrEqualTo,
				},
				{
					// flags from sched.h
					// CLONE_NEWNS		0x00020000
					Index: 0,
					Value: uint64(0x00020000),
					Op:    configs.EqualTo,
				},
			},
		},
		{
			// Deny manipulation and functions on kernel modules.
			Name:   "create_module",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Deny manipulation and functions on kernel modules.
			Name:   "delete_module",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Deny manipulation and functions on kernel modules.
			Name:   "finit_module",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Deny retrieval of exported kernel and module symbols
			Name:   "get_kernel_syms",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Terrifying syscalls that modify kernel memory and NUMA settings.
			// They're gated by CAP_SYS_NICE,
			// which we do not retain by default in containers.
			Name:   "get_mempolicy",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Deny manipulation and functions on kernel modules.
			Name:   "init_module",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Prevent containers from modifying kernel I/O privilege levels.
			// Already restricted as containers drop CAP_SYS_RAWIO by default.
			Name:   "ioperm",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Prevent containers from modifying kernel I/O privilege levels.
			// Already restricted as containers drop CAP_SYS_RAWIO by default.
			Name:   "iopl",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Restrict process inspection capabilities
			// Already blocked by dropping CAP_PTRACE
			Name:   "kcmp",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Sister syscall of kexec_load that does the same thing,
			// slightly different arguments
			Name:   "kexec_file_load",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Deny loading a new kernel for later execution
			Name:   "kexec_load",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Prevent containers from using the kernel keyring,
			// which is not namespaced
			Name:   "keyctl",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Tracing/profiling syscalls,
			// which could leak a lot of information on the host
			Name:   "lookup_dcookie",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Terrifying syscalls that modify kernel memory and NUMA settings.
			// They're gated by CAP_SYS_NICE,
			// which we do not retain by default in containers.
			Name:   "mbind",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Terrifying syscalls that modify kernel memory and NUMA settings.
			// They're gated by CAP_SYS_NICE,
			// which we do not retain by default in containers.
			Name:   "migrate_pages",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Old syscall only used in 16-bit code,
			// and a potential information leak
			Name:   "modify_ldt",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Deny mount
			Name:   "mount",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Terrifying syscalls that modify kernel memory and NUMA settings.
			// They're gated by CAP_SYS_NICE,
			// which we do not retain by default in containers.
			Name:   "move_pages",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Deny interaction with the kernel nfs daemon
			Name:   "nfsservctl",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Cause of an old container breakout,
			// might as well restrict it to be on the safe side
			Name:   "open_by_handle_at",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Tracing/profiling syscalls,
			// which could leak a lot of information on the host
			Name:   "perf_event_open",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Prevent container from enabling BSD emulation.
			// Not inherently dangerous, but poorly tested,
			// potential for a lot of kernel vulns in this.
			Name:   "personality",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Deny pivot_root
			Name:   "pivot_root",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Restrict process inspection capabilities
			// Already blocked by dropping CAP_PTRACE
			Name:   "process_vm_readv",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Restrict process modification capabilities
			// Already blocked by dropping CAP_PTRACE
			Name:   "process_vm_writev",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Already blocked by dropping CAP_PTRACE
			Name:   "ptrace",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Deny manipulation and functions on kernel modules.
			Name:   "query_module",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Quota and Accounting syscalls which could let containers
			// disable their own resource limits or process accounting
			Name:   "quotactl",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Probably a bad idea to let containers reboot the host
			Name:   "reboot",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Probably a bad idea to let containers restart a syscall.
			// Possible seccomp bypass, see: https://code.google.com/p/chromium/issues/detail?id=408827.
			Name:   "restart_syscall",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Prevent containers from using the kernel keyring,
			// which is not namespaced
			Name:   "request_key",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// meta, deny seccomp
			Name:   "seccomp",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Terrifying syscalls that modify kernel memory and NUMA settings.
			// They're gated by CAP_SYS_NICE,
			// which we do not retain by default in containers.
			Name:   "set_mempolicy",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// deny associating a thread with a namespace
			Name:   "setns",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Time/Date is not namespaced
			Name:   "settimeofday",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Time/Date is not namespaced
			Name:   "stime",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Deny start/stop swapping to file/device
			Name:   "swapon",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Deny start/stop swapping to file/device
			Name:   "swapoff",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Deny read/write system parameters
			Name:   "_sysctl",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Deny umount
			Name:   "umount",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Deny umount
			Name:   "umount2",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Same as clone
			Name:   "unshare",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// Older syscall related to shared libraries, unused for a long time
			Name:   "uselib",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// In kernel x86 real mode virtual machine
			Name:   "vm86",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
		{
			// In kernel x86 real mode virtual machine
			Name:   "vm86old",
			Action: configs.Errno,
			Args:   []*configs.Arg{},
		},
	},
}