Signed-off-by: Rob Murray <rob.murray@docker.com>
| ... | ... |
@@ -261,6 +261,10 @@ func (daemon *Daemon) create(ctx context.Context, daemonCfg *config.Config, opts |
| 261 | 261 |
} |
| 262 | 262 |
daemon.updateContainerNetworkSettings(ctr, endpointsConfigs) |
| 263 | 263 |
|
| 264 |
+ if err := daemon.nri.CreateContainer(ctx, ctr); err != nil {
|
|
| 265 |
+ return nil, err |
|
| 266 |
+ } |
|
| 267 |
+ |
|
| 264 | 268 |
if err := daemon.registerMountPoints(ctr, opts.params.DefaultReadOnlyNonRecursive); err != nil {
|
| 265 | 269 |
return nil, err |
| 266 | 270 |
} |
| ... | ... |
@@ -1,3 +1,19 @@ |
| 1 |
+// Package nri integrates the daemon with the NRI (Node Resource Interface) framework. |
|
| 2 |
+// |
|
| 3 |
+// NRI allows external plugins to observe and adjust container resources and settings |
|
| 4 |
+// at creation time, and to observe container lifecycle events. These plugins run with |
|
| 5 |
+// the same level of trust as the daemon itself, because they can make arbitrary |
|
| 6 |
+// modifications to container settings. |
|
| 7 |
+// |
|
| 8 |
+// The NRI framework is implemented by https://github.com/containerd/nri - see that |
|
| 9 |
+// package for more details about NRI and the framework. |
|
| 10 |
+// |
|
| 11 |
+// Plugins are long-running processed (not instantiated per-request like runtime shims, |
|
| 12 |
+// so they can maintain state across container events). They can either be started by |
|
| 13 |
+// the NRI framework itself, it is configured with directories to search for plugins |
|
| 14 |
+// and config for those plugins. Or, plugins can independently, and connect to the |
|
| 15 |
+// daemon via a listening socket. By default, the listening socket is disabled in this |
|
| 16 |
+// implementation. |
|
| 1 | 17 |
package nri |
| 2 | 18 |
|
| 3 | 19 |
import ( |
| ... | ... |
@@ -5,6 +21,7 @@ import ( |
| 5 | 5 |
"errors" |
| 6 | 6 |
"fmt" |
| 7 | 7 |
"path/filepath" |
| 8 |
+ "strings" |
|
| 8 | 9 |
"sync" |
| 9 | 10 |
|
| 10 | 11 |
"github.com/containerd/log" |
| ... | ... |
@@ -43,6 +60,7 @@ type Config struct {
|
| 43 | 43 |
ContainerLister ContainerLister |
| 44 | 44 |
} |
| 45 | 45 |
|
| 46 |
+// NewNRI creates and starts a new NRI instance based on the provided configuration. |
|
| 46 | 47 |
func NewNRI(ctx context.Context, cfg Config) (*NRI, error) {
|
| 47 | 48 |
n := &NRI{cfg: cfg}
|
| 48 | 49 |
if !n.cfg.DaemonConfig.Enable {
|
| ... | ... |
@@ -71,6 +89,7 @@ func NewNRI(ctx context.Context, cfg Config) (*NRI, error) {
|
| 71 | 71 |
return n, nil |
| 72 | 72 |
} |
| 73 | 73 |
|
| 74 |
+// Shutdown stops the NRI instance and releases its resources. |
|
| 74 | 75 |
func (n *NRI) Shutdown(ctx context.Context) {
|
| 75 | 76 |
n.mu.Lock() |
| 76 | 77 |
defer n.mu.Unlock() |
| ... | ... |
@@ -82,10 +101,76 @@ func (n *NRI) Shutdown(ctx context.Context) {
|
| 82 | 82 |
n.nri = nil |
| 83 | 83 |
} |
| 84 | 84 |
|
| 85 |
+// CreateContainer notifies plugins of a "creation" NRI-lifecycle event for a container, |
|
| 86 |
+// allowing the plugin to adjust settings before the container is created. |
|
| 87 |
+func (n *NRI) CreateContainer(ctx context.Context, ctr *container.Container) error {
|
|
| 88 |
+ n.mu.RLock() |
|
| 89 |
+ defer n.mu.RUnlock() |
|
| 90 |
+ if n.nri == nil {
|
|
| 91 |
+ return nil |
|
| 92 |
+ } |
|
| 93 |
+ // ctr.State can safely be locked here, but there's no need because it's expected |
|
| 94 |
+ // to be newly created and not yet accessible in any other thread. |
|
| 95 |
+ |
|
| 96 |
+ nriPod, nriCtr, err := containerToNRI(ctr) |
|
| 97 |
+ if err != nil {
|
|
| 98 |
+ return err |
|
| 99 |
+ } |
|
| 100 |
+ |
|
| 101 |
+ // TODO(robmry): call RunPodSandbox? |
|
| 102 |
+ |
|
| 103 |
+ resp, err := n.nri.CreateContainer(ctx, &adaptation.CreateContainerRequest{
|
|
| 104 |
+ Pod: nriPod, |
|
| 105 |
+ Container: nriCtr, |
|
| 106 |
+ }) |
|
| 107 |
+ if err != nil {
|
|
| 108 |
+ return err |
|
| 109 |
+ } |
|
| 110 |
+ |
|
| 111 |
+ if resp.GetUpdate() != nil {
|
|
| 112 |
+ return errors.New("container update is not supported")
|
|
| 113 |
+ } |
|
| 114 |
+ if err := applyAdjustments(ctx, ctr, resp.GetAdjust()); err != nil {
|
|
| 115 |
+ return err |
|
| 116 |
+ } |
|
| 117 |
+ return nil |
|
| 118 |
+} |
|
| 119 |
+ |
|
| 120 |
+// syncFn is called when a plugin registers, allowing the plugin to learn the |
|
| 121 |
+// current state of all containers. |
|
| 85 | 122 |
func (n *NRI) syncFn(ctx context.Context, syncCB adaptation.SyncCB) error {
|
| 123 |
+ // Claim a write lock so containers can't be created/removed until sync is done. |
|
| 124 |
+ // The plugin will get create/remove events after the sync, so won't miss anything. |
|
| 125 |
+ // |
|
| 126 |
+ // If a container's state changes during the sync, the plugin may see already-modified |
|
| 127 |
+ // state, then get a change notification with no changes. |
|
| 128 |
+ n.mu.Lock() |
|
| 129 |
+ defer n.mu.Unlock() |
|
| 130 |
+ |
|
| 131 |
+ containers := n.cfg.ContainerLister.List() |
|
| 132 |
+ nriPods := make([]*adaptation.PodSandbox, 0, len(containers)) |
|
| 133 |
+ nriCtrs := make([]*adaptation.Container, 0, len(containers)) |
|
| 134 |
+ for _, ctr := range containers {
|
|
| 135 |
+ ctr.State.Lock() |
|
| 136 |
+ nriPod, nriCtr, err := containerToNRI(ctr) |
|
| 137 |
+ ctr.State.Unlock() |
|
| 138 |
+ if err != nil {
|
|
| 139 |
+ return fmt.Errorf("converting container %s to NRI: %w", ctr.ID, err)
|
|
| 140 |
+ } |
|
| 141 |
+ nriPods = append(nriPods, nriPod) |
|
| 142 |
+ nriCtrs = append(nriCtrs, nriCtr) |
|
| 143 |
+ } |
|
| 144 |
+ updates, err := syncCB(ctx, nriPods, nriCtrs) |
|
| 145 |
+ if err != nil {
|
|
| 146 |
+ return fmt.Errorf("synchronizing NRI state: %w", err)
|
|
| 147 |
+ } |
|
| 148 |
+ if len(updates) > 0 {
|
|
| 149 |
+ return errors.New("container updates during sync are not implemented")
|
|
| 150 |
+ } |
|
| 86 | 151 |
return nil |
| 87 | 152 |
} |
| 88 | 153 |
|
| 154 |
+// updateFn may be called asynchronously by plugins. |
|
| 89 | 155 |
func (n *NRI) updateFn(context.Context, []*adaptation.ContainerUpdate) ([]*adaptation.ContainerUpdate, error) {
|
| 90 | 156 |
return nil, errors.New("not implemented")
|
| 91 | 157 |
} |
| ... | ... |
@@ -128,3 +213,95 @@ func nriOptions(cfg opts.NRIOpts) []adaptation.Option {
|
| 128 | 128 |
} |
| 129 | 129 |
return res |
| 130 | 130 |
} |
| 131 |
+ |
|
| 132 |
+func containerToNRI(ctr *container.Container) (*adaptation.PodSandbox, *adaptation.Container, error) {
|
|
| 133 |
+ // TODO(robmry) - this implementation is incomplete, most fields are not populated. |
|
| 134 |
+ // |
|
| 135 |
+ // Many of these fields have straightforward mappings from Docker container fields, |
|
| 136 |
+ // but each will need consideration and tests for both outgoing settings and |
|
| 137 |
+ // adjutments from plugins. |
|
| 138 |
+ // |
|
| 139 |
+ // Docker doesn't have pods - but PodSandbox is how plugins will learn the container's |
|
| 140 |
+ // network namespace. So, the intent is to represent each container as having its own |
|
| 141 |
+ // PodSandbox, with the same ID and lifecycle as the container. We can probably represent |
|
| 142 |
+ // container-networking as containers sharing a pod. |
|
| 143 |
+ nriPod := &adaptation.PodSandbox{
|
|
| 144 |
+ Id: ctr.ID, |
|
| 145 |
+ Name: ctr.Name, |
|
| 146 |
+ Uid: "", |
|
| 147 |
+ Namespace: "", |
|
| 148 |
+ Labels: nil, |
|
| 149 |
+ Annotations: nil, |
|
| 150 |
+ RuntimeHandler: "", |
|
| 151 |
+ Linux: nil, |
|
| 152 |
+ Pid: 0, |
|
| 153 |
+ Ips: nil, |
|
| 154 |
+ } |
|
| 155 |
+ |
|
| 156 |
+ nriCtr := &adaptation.Container{
|
|
| 157 |
+ Id: ctr.ID, |
|
| 158 |
+ PodSandboxId: ctr.ID, |
|
| 159 |
+ Name: ctr.Name, |
|
| 160 |
+ State: adaptation.ContainerState_CONTAINER_UNKNOWN, |
|
| 161 |
+ Labels: ctr.Config.Labels, |
|
| 162 |
+ Annotations: ctr.HostConfig.Annotations, |
|
| 163 |
+ Args: ctr.Config.Cmd, |
|
| 164 |
+ Env: ctr.Config.Env, |
|
| 165 |
+ Hooks: nil, |
|
| 166 |
+ Linux: &adaptation.LinuxContainer{
|
|
| 167 |
+ Namespaces: nil, |
|
| 168 |
+ Devices: nil, |
|
| 169 |
+ Resources: nil, |
|
| 170 |
+ OomScoreAdj: nil, |
|
| 171 |
+ CgroupsPath: "", |
|
| 172 |
+ IoPriority: nil, |
|
| 173 |
+ SeccompProfile: nil, |
|
| 174 |
+ SeccompPolicy: nil, |
|
| 175 |
+ }, |
|
| 176 |
+ Mounts: nil, |
|
| 177 |
+ Pid: uint32(ctr.Pid), |
|
| 178 |
+ Rlimits: nil, |
|
| 179 |
+ CreatedAt: 0, |
|
| 180 |
+ StartedAt: 0, |
|
| 181 |
+ FinishedAt: 0, |
|
| 182 |
+ ExitCode: 0, |
|
| 183 |
+ StatusReason: "", |
|
| 184 |
+ StatusMessage: "", |
|
| 185 |
+ CDIDevices: nil, |
|
| 186 |
+ } |
|
| 187 |
+ return nriPod, nriCtr, nil |
|
| 188 |
+} |
|
| 189 |
+ |
|
| 190 |
+func applyAdjustments(ctx context.Context, ctr *container.Container, adj *adaptation.ContainerAdjustment) error {
|
|
| 191 |
+ if adj == nil {
|
|
| 192 |
+ return nil |
|
| 193 |
+ } |
|
| 194 |
+ if err := applyEnvVars(ctx, ctr, adj.Env); err != nil {
|
|
| 195 |
+ return fmt.Errorf("applying environment variable adjustments: %w", err)
|
|
| 196 |
+ } |
|
| 197 |
+ return nil |
|
| 198 |
+} |
|
| 199 |
+ |
|
| 200 |
+func applyEnvVars(ctx context.Context, ctr *container.Container, envVars []*adaptation.KeyValue) error {
|
|
| 201 |
+ if len(envVars) == 0 {
|
|
| 202 |
+ return nil |
|
| 203 |
+ } |
|
| 204 |
+ existing := make(map[string]int, len(ctr.Config.Env)) |
|
| 205 |
+ for i, e := range ctr.Config.Env {
|
|
| 206 |
+ k, _, _ := strings.Cut(e, "=") |
|
| 207 |
+ existing[k] = i |
|
| 208 |
+ } |
|
| 209 |
+ for _, kv := range envVars {
|
|
| 210 |
+ if kv.Key == "" {
|
|
| 211 |
+ return errors.New("empty environment variable key")
|
|
| 212 |
+ } |
|
| 213 |
+ val := kv.Key + "=" + kv.Value |
|
| 214 |
+ log.G(ctx).Debugf("Applying NRI env var adjustment to %s", kv.Key)
|
|
| 215 |
+ if i, found := existing[kv.Key]; found {
|
|
| 216 |
+ ctr.Config.Env[i] = val |
|
| 217 |
+ } else {
|
|
| 218 |
+ ctr.Config.Env = append(ctr.Config.Env, val) |
|
| 219 |
+ } |
|
| 220 |
+ } |
|
| 221 |
+ return nil |
|
| 222 |
+} |