aboutsummaryrefslogtreecommitdiff
path: root/pkg/specgen/generate/security_linux.go
blob: aacefcbac2cad9d5535be693b6b92b71c4071c00 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
package generate

import (
	"fmt"
	"strings"

	"github.com/containers/common/libimage"
	"github.com/containers/common/pkg/apparmor"
	"github.com/containers/common/pkg/capabilities"
	"github.com/containers/common/pkg/config"
	cutil "github.com/containers/common/pkg/util"
	"github.com/containers/podman/v4/libpod"
	"github.com/containers/podman/v4/libpod/define"
	"github.com/containers/podman/v4/pkg/specgen"
	"github.com/containers/podman/v4/pkg/util"
	"github.com/opencontainers/runtime-tools/generate"
	"github.com/opencontainers/selinux/go-selinux/label"
	"github.com/sirupsen/logrus"
)

// setLabelOpts sets the label options of the SecurityConfig according to the
// input.
func setLabelOpts(s *specgen.SpecGenerator, runtime *libpod.Runtime, pidConfig specgen.Namespace, ipcConfig specgen.Namespace) error {
	if !runtime.EnableLabeling() || s.Privileged {
		s.SelinuxOpts = label.DisableSecOpt()
		return nil
	}

	var labelOpts []string
	if pidConfig.IsHost() {
		labelOpts = append(labelOpts, label.DisableSecOpt()...)
	} else if pidConfig.IsContainer() {
		ctr, err := runtime.LookupContainer(pidConfig.Value)
		if err != nil {
			return fmt.Errorf("container %q not found: %w", pidConfig.Value, err)
		}
		secopts, err := label.DupSecOpt(ctr.ProcessLabel())
		if err != nil {
			return fmt.Errorf("failed to duplicate label %q : %w", ctr.ProcessLabel(), err)
		}
		labelOpts = append(labelOpts, secopts...)
	}

	if ipcConfig.IsHost() {
		labelOpts = append(labelOpts, label.DisableSecOpt()...)
	} else if ipcConfig.IsContainer() {
		ctr, err := runtime.LookupContainer(ipcConfig.Value)
		if err != nil {
			return fmt.Errorf("container %q not found: %w", ipcConfig.Value, err)
		}
		secopts, err := label.DupSecOpt(ctr.ProcessLabel())
		if err != nil {
			return fmt.Errorf("failed to duplicate label %q : %w", ctr.ProcessLabel(), err)
		}
		labelOpts = append(labelOpts, secopts...)
	}

	s.SelinuxOpts = append(s.SelinuxOpts, labelOpts...)
	return nil
}

func setupApparmor(s *specgen.SpecGenerator, rtc *config.Config, g *generate.Generator) error {
	hasProfile := len(s.ApparmorProfile) > 0
	if !apparmor.IsEnabled() {
		if hasProfile && s.ApparmorProfile != "unconfined" {
			return fmt.Errorf("apparmor profile %q specified, but Apparmor is not enabled on this system", s.ApparmorProfile)
		}
		return nil
	}
	// If privileged and caller did not specify apparmor profiles return
	if s.Privileged && !hasProfile {
		return nil
	}
	if !hasProfile {
		s.ApparmorProfile = rtc.Containers.ApparmorProfile
	}
	if len(s.ApparmorProfile) > 0 {
		g.SetProcessApparmorProfile(s.ApparmorProfile)
	}

	return nil
}

func securityConfigureGenerator(s *specgen.SpecGenerator, g *generate.Generator, newImage *libimage.Image, rtc *config.Config) error {
	var (
		caplist []string
		err     error
	)
	// HANDLE CAPABILITIES
	// NOTE: Must happen before SECCOMP
	if s.Privileged {
		g.SetupPrivileged(true)
		caplist, err = capabilities.BoundingSet()
		if err != nil {
			return err
		}
	} else {
		mergedCaps, err := capabilities.MergeCapabilities(rtc.Containers.DefaultCapabilities, s.CapAdd, s.CapDrop)
		if err != nil {
			return err
		}
		boundingSet, err := capabilities.BoundingSet()
		if err != nil {
			return err
		}
		boundingCaps := make(map[string]interface{})
		for _, b := range boundingSet {
			boundingCaps[b] = b
		}
		for _, c := range mergedCaps {
			if _, ok := boundingCaps[c]; ok {
				caplist = append(caplist, c)
			}
		}

		privCapsRequired := []string{}

		// If the container image specifies an label with a
		// capabilities.ContainerImageLabel then split the comma separated list
		// of capabilities and record them.  This list indicates the only
		// capabilities, required to run the container.
		var capsRequiredRequested []string
		for key, val := range s.Labels {
			if cutil.StringInSlice(key, capabilities.ContainerImageLabels) {
				capsRequiredRequested = strings.Split(val, ",")
			}
		}
		if !s.Privileged && len(capsRequiredRequested) > 0 {
			// Pass capRequiredRequested in CapAdd field to normalize capabilities names
			capsRequired, err := capabilities.MergeCapabilities(nil, capsRequiredRequested, nil)
			if err != nil {
				return fmt.Errorf("capabilities requested by user or image are not valid: %q: %w", strings.Join(capsRequired, ","), err)
			}
			// Verify all capRequired are in the capList
			for _, cap := range capsRequired {
				if !cutil.StringInSlice(cap, caplist) {
					privCapsRequired = append(privCapsRequired, cap)
				}
			}
			if len(privCapsRequired) == 0 {
				caplist = capsRequired
			} else {
				logrus.Errorf("Capabilities requested by user or image are not allowed by default: %q", strings.Join(privCapsRequired, ","))
			}
		}
	}

	configSpec := g.Config
	configSpec.Process.Capabilities.Ambient = []string{}

	// Always unset the inheritable capabilities similarly to what the Linux kernel does
	// They are used only when using capabilities with uid != 0.
	configSpec.Process.Capabilities.Inheritable = []string{}
	configSpec.Process.Capabilities.Bounding = caplist

	user := strings.Split(s.User, ":")[0]

	if (user == "" && s.UserNS.NSMode != specgen.KeepID) || user == "root" || user == "0" {
		configSpec.Process.Capabilities.Effective = caplist
		configSpec.Process.Capabilities.Permitted = caplist
	} else {
		mergedCaps, err := capabilities.MergeCapabilities(nil, s.CapAdd, nil)
		if err != nil {
			return fmt.Errorf("capabilities requested by user are not valid: %q: %w", strings.Join(s.CapAdd, ","), err)
		}
		boundingSet, err := capabilities.BoundingSet()
		if err != nil {
			return err
		}
		boundingCaps := make(map[string]interface{})
		for _, b := range boundingSet {
			boundingCaps[b] = b
		}
		var userCaps []string
		for _, c := range mergedCaps {
			if _, ok := boundingCaps[c]; ok {
				userCaps = append(userCaps, c)
			}
		}
		configSpec.Process.Capabilities.Effective = userCaps
		configSpec.Process.Capabilities.Permitted = userCaps

		// Ambient capabilities were added to Linux 4.3.  Set ambient
		// capabilities only when the kernel supports them.
		if supportAmbientCapabilities() {
			configSpec.Process.Capabilities.Ambient = userCaps
			configSpec.Process.Capabilities.Inheritable = userCaps
		}
	}

	g.SetProcessNoNewPrivileges(s.NoNewPrivileges)

	if err := setupApparmor(s, rtc, g); err != nil {
		return err
	}

	// HANDLE SECCOMP
	if s.SeccompProfilePath != "unconfined" {
		seccompConfig, err := getSeccompConfig(s, configSpec, newImage)
		if err != nil {
			return err
		}
		configSpec.Linux.Seccomp = seccompConfig
	}

	// Clear default Seccomp profile from Generator for unconfined containers
	// and privileged containers which do not specify a seccomp profile.
	if s.SeccompProfilePath == "unconfined" || (s.Privileged && (s.SeccompProfilePath == "" || s.SeccompProfilePath == config.SeccompOverridePath || s.SeccompProfilePath == config.SeccompDefaultPath)) {
		configSpec.Linux.Seccomp = nil
	}

	g.SetRootReadonly(s.ReadOnlyFilesystem)

	noUseIPC := s.IpcNS.NSMode == specgen.FromContainer || s.IpcNS.NSMode == specgen.FromPod || s.IpcNS.NSMode == specgen.Host
	noUseNet := s.NetNS.NSMode == specgen.FromContainer || s.NetNS.NSMode == specgen.FromPod || s.NetNS.NSMode == specgen.Host
	noUseUTS := s.UtsNS.NSMode == specgen.FromContainer || s.UtsNS.NSMode == specgen.FromPod || s.UtsNS.NSMode == specgen.Host

	// Add default sysctls
	defaultSysctls, err := util.ValidateSysctls(rtc.Sysctls())
	if err != nil {
		return err
	}
	for sysctlKey, sysctlVal := range defaultSysctls {
		// Ignore mqueue sysctls if --ipc=host
		if noUseIPC && strings.HasPrefix(sysctlKey, "fs.mqueue.") {
			logrus.Infof("Sysctl %s=%s ignored in containers.conf, since IPC Namespace set to %q", sysctlKey, sysctlVal, s.IpcNS.NSMode)

			continue
		}

		// Ignore net sysctls if --net=host
		if noUseNet && strings.HasPrefix(sysctlKey, "net.") {
			logrus.Infof("Sysctl %s=%s ignored in containers.conf, since Network Namespace set to host", sysctlKey, sysctlVal)
			continue
		}

		// Ignore uts sysctls if --uts=host
		if noUseUTS && (strings.HasPrefix(sysctlKey, "kernel.domainname") || strings.HasPrefix(sysctlKey, "kernel.hostname")) {
			logrus.Infof("Sysctl %s=%s ignored in containers.conf, since UTS Namespace set to host", sysctlKey, sysctlVal)
			continue
		}

		g.AddLinuxSysctl(sysctlKey, sysctlVal)
	}

	for sysctlKey, sysctlVal := range s.Sysctl {
		if s.IpcNS.IsHost() && strings.HasPrefix(sysctlKey, "fs.mqueue.") {
			return fmt.Errorf("sysctl %s=%s can't be set since IPC Namespace set to host: %w", sysctlKey, sysctlVal, define.ErrInvalidArg)
		}

		// Ignore net sysctls if --net=host
		if s.NetNS.IsHost() && strings.HasPrefix(sysctlKey, "net.") {
			return fmt.Errorf("sysctl %s=%s can't be set since Network Namespace set to host: %w", sysctlKey, sysctlVal, define.ErrInvalidArg)
		}

		// Ignore uts sysctls if --uts=host
		if s.UtsNS.IsHost() && (strings.HasPrefix(sysctlKey, "kernel.domainname") || strings.HasPrefix(sysctlKey, "kernel.hostname")) {
			return fmt.Errorf("sysctl %s=%s can't be set since UTS Namespace set to host: %w", sysctlKey, sysctlVal, define.ErrInvalidArg)
		}

		g.AddLinuxSysctl(sysctlKey, sysctlVal)
	}

	return nil
}