Browse code

NRI: add ContainerCreate hook, allow env-var adjustments

Signed-off-by: Rob Murray <rob.murray@docker.com>

Rob Murray authored on 2025/11/29 01:36:51
Showing 2 changed files
... ...
@@ -261,6 +261,10 @@ func (daemon *Daemon) create(ctx context.Context, daemonCfg *config.Config, opts
261 261
 	}
262 262
 	daemon.updateContainerNetworkSettings(ctr, endpointsConfigs)
263 263
 
264
+	if err := daemon.nri.CreateContainer(ctx, ctr); err != nil {
265
+		return nil, err
266
+	}
267
+
264 268
 	if err := daemon.registerMountPoints(ctr, opts.params.DefaultReadOnlyNonRecursive); err != nil {
265 269
 		return nil, err
266 270
 	}
... ...
@@ -1,3 +1,19 @@
1
+// Package nri integrates the daemon with the NRI (Node Resource Interface) framework.
2
+//
3
+// NRI allows external plugins to observe and adjust container resources and settings
4
+// at creation time, and to observe container lifecycle events. These plugins run with
5
+// the same level of trust as the daemon itself, because they can make arbitrary
6
+// modifications to container settings.
7
+//
8
+// The NRI framework is implemented by https://github.com/containerd/nri - see that
9
+// package for more details about NRI and the framework.
10
+//
11
+// Plugins are long-running processed (not instantiated per-request like runtime shims,
12
+// so they can maintain state across container events). They can either be started by
13
+// the NRI framework itself, it is configured with directories to search for plugins
14
+// and config for those plugins. Or, plugins can independently, and connect to the
15
+// daemon via a listening socket. By default, the listening socket is disabled in this
16
+// implementation.
1 17
 package nri
2 18
 
3 19
 import (
... ...
@@ -5,6 +21,7 @@ import (
5 5
 	"errors"
6 6
 	"fmt"
7 7
 	"path/filepath"
8
+	"strings"
8 9
 	"sync"
9 10
 
10 11
 	"github.com/containerd/log"
... ...
@@ -43,6 +60,7 @@ type Config struct {
43 43
 	ContainerLister ContainerLister
44 44
 }
45 45
 
46
+// NewNRI creates and starts a new NRI instance based on the provided configuration.
46 47
 func NewNRI(ctx context.Context, cfg Config) (*NRI, error) {
47 48
 	n := &NRI{cfg: cfg}
48 49
 	if !n.cfg.DaemonConfig.Enable {
... ...
@@ -71,6 +89,7 @@ func NewNRI(ctx context.Context, cfg Config) (*NRI, error) {
71 71
 	return n, nil
72 72
 }
73 73
 
74
+// Shutdown stops the NRI instance and releases its resources.
74 75
 func (n *NRI) Shutdown(ctx context.Context) {
75 76
 	n.mu.Lock()
76 77
 	defer n.mu.Unlock()
... ...
@@ -82,10 +101,76 @@ func (n *NRI) Shutdown(ctx context.Context) {
82 82
 	n.nri = nil
83 83
 }
84 84
 
85
+// CreateContainer notifies plugins of a "creation" NRI-lifecycle event for a container,
86
+// allowing the plugin to adjust settings before the container is created.
87
+func (n *NRI) CreateContainer(ctx context.Context, ctr *container.Container) error {
88
+	n.mu.RLock()
89
+	defer n.mu.RUnlock()
90
+	if n.nri == nil {
91
+		return nil
92
+	}
93
+	// ctr.State can safely be locked here, but there's no need because it's expected
94
+	// to be newly created and not yet accessible in any other thread.
95
+
96
+	nriPod, nriCtr, err := containerToNRI(ctr)
97
+	if err != nil {
98
+		return err
99
+	}
100
+
101
+	// TODO(robmry): call RunPodSandbox?
102
+
103
+	resp, err := n.nri.CreateContainer(ctx, &adaptation.CreateContainerRequest{
104
+		Pod:       nriPod,
105
+		Container: nriCtr,
106
+	})
107
+	if err != nil {
108
+		return err
109
+	}
110
+
111
+	if resp.GetUpdate() != nil {
112
+		return errors.New("container update is not supported")
113
+	}
114
+	if err := applyAdjustments(ctx, ctr, resp.GetAdjust()); err != nil {
115
+		return err
116
+	}
117
+	return nil
118
+}
119
+
120
+// syncFn is called when a plugin registers, allowing the plugin to learn the
121
+// current state of all containers.
85 122
 func (n *NRI) syncFn(ctx context.Context, syncCB adaptation.SyncCB) error {
123
+	// Claim a write lock so containers can't be created/removed until sync is done.
124
+	// The plugin will get create/remove events after the sync, so won't miss anything.
125
+	//
126
+	// If a container's state changes during the sync, the plugin may see already-modified
127
+	// state, then get a change notification with no changes.
128
+	n.mu.Lock()
129
+	defer n.mu.Unlock()
130
+
131
+	containers := n.cfg.ContainerLister.List()
132
+	nriPods := make([]*adaptation.PodSandbox, 0, len(containers))
133
+	nriCtrs := make([]*adaptation.Container, 0, len(containers))
134
+	for _, ctr := range containers {
135
+		ctr.State.Lock()
136
+		nriPod, nriCtr, err := containerToNRI(ctr)
137
+		ctr.State.Unlock()
138
+		if err != nil {
139
+			return fmt.Errorf("converting container %s to NRI: %w", ctr.ID, err)
140
+		}
141
+		nriPods = append(nriPods, nriPod)
142
+		nriCtrs = append(nriCtrs, nriCtr)
143
+	}
144
+	updates, err := syncCB(ctx, nriPods, nriCtrs)
145
+	if err != nil {
146
+		return fmt.Errorf("synchronizing NRI state: %w", err)
147
+	}
148
+	if len(updates) > 0 {
149
+		return errors.New("container updates during sync are not implemented")
150
+	}
86 151
 	return nil
87 152
 }
88 153
 
154
+// updateFn may be called asynchronously by plugins.
89 155
 func (n *NRI) updateFn(context.Context, []*adaptation.ContainerUpdate) ([]*adaptation.ContainerUpdate, error) {
90 156
 	return nil, errors.New("not implemented")
91 157
 }
... ...
@@ -128,3 +213,95 @@ func nriOptions(cfg opts.NRIOpts) []adaptation.Option {
128 128
 	}
129 129
 	return res
130 130
 }
131
+
132
+func containerToNRI(ctr *container.Container) (*adaptation.PodSandbox, *adaptation.Container, error) {
133
+	// TODO(robmry) - this implementation is incomplete, most fields are not populated.
134
+	//
135
+	// Many of these fields have straightforward mappings from Docker container fields,
136
+	// but each will need consideration and tests for both outgoing settings and
137
+	// adjutments from plugins.
138
+	//
139
+	// Docker doesn't have pods - but PodSandbox is how plugins will learn the container's
140
+	// network namespace. So, the intent is to represent each container as having its own
141
+	// PodSandbox, with the same ID and lifecycle as the container. We can probably represent
142
+	// container-networking as containers sharing a pod.
143
+	nriPod := &adaptation.PodSandbox{
144
+		Id:             ctr.ID,
145
+		Name:           ctr.Name,
146
+		Uid:            "",
147
+		Namespace:      "",
148
+		Labels:         nil,
149
+		Annotations:    nil,
150
+		RuntimeHandler: "",
151
+		Linux:          nil,
152
+		Pid:            0,
153
+		Ips:            nil,
154
+	}
155
+
156
+	nriCtr := &adaptation.Container{
157
+		Id:           ctr.ID,
158
+		PodSandboxId: ctr.ID,
159
+		Name:         ctr.Name,
160
+		State:        adaptation.ContainerState_CONTAINER_UNKNOWN,
161
+		Labels:       ctr.Config.Labels,
162
+		Annotations:  ctr.HostConfig.Annotations,
163
+		Args:         ctr.Config.Cmd,
164
+		Env:          ctr.Config.Env,
165
+		Hooks:        nil,
166
+		Linux: &adaptation.LinuxContainer{
167
+			Namespaces:     nil,
168
+			Devices:        nil,
169
+			Resources:      nil,
170
+			OomScoreAdj:    nil,
171
+			CgroupsPath:    "",
172
+			IoPriority:     nil,
173
+			SeccompProfile: nil,
174
+			SeccompPolicy:  nil,
175
+		},
176
+		Mounts:        nil,
177
+		Pid:           uint32(ctr.Pid),
178
+		Rlimits:       nil,
179
+		CreatedAt:     0,
180
+		StartedAt:     0,
181
+		FinishedAt:    0,
182
+		ExitCode:      0,
183
+		StatusReason:  "",
184
+		StatusMessage: "",
185
+		CDIDevices:    nil,
186
+	}
187
+	return nriPod, nriCtr, nil
188
+}
189
+
190
+func applyAdjustments(ctx context.Context, ctr *container.Container, adj *adaptation.ContainerAdjustment) error {
191
+	if adj == nil {
192
+		return nil
193
+	}
194
+	if err := applyEnvVars(ctx, ctr, adj.Env); err != nil {
195
+		return fmt.Errorf("applying environment variable adjustments: %w", err)
196
+	}
197
+	return nil
198
+}
199
+
200
+func applyEnvVars(ctx context.Context, ctr *container.Container, envVars []*adaptation.KeyValue) error {
201
+	if len(envVars) == 0 {
202
+		return nil
203
+	}
204
+	existing := make(map[string]int, len(ctr.Config.Env))
205
+	for i, e := range ctr.Config.Env {
206
+		k, _, _ := strings.Cut(e, "=")
207
+		existing[k] = i
208
+	}
209
+	for _, kv := range envVars {
210
+		if kv.Key == "" {
211
+			return errors.New("empty environment variable key")
212
+		}
213
+		val := kv.Key + "=" + kv.Value
214
+		log.G(ctx).Debugf("Applying NRI env var adjustment to %s", kv.Key)
215
+		if i, found := existing[kv.Key]; found {
216
+			ctr.Config.Env[i] = val
217
+		} else {
218
+			ctr.Config.Env = append(ctr.Config.Env, val)
219
+		}
220
+	}
221
+	return nil
222
+}