1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
|
// +build linux darwin
package utils
import (
"bufio"
"bytes"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strings"
"github.com/containers/podman/v2/pkg/cgroups"
"github.com/containers/podman/v2/pkg/rootless"
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
"github.com/godbus/dbus/v5"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
// RunUnderSystemdScope adds the specified pid to a systemd scope
func RunUnderSystemdScope(pid int, slice string, unitName string) error {
var properties []systemdDbus.Property
var conn *systemdDbus.Conn
var err error
if rootless.IsRootless() {
conn, err = cgroups.GetUserConnection(rootless.GetRootlessUID())
if err != nil {
return err
}
} else {
conn, err = systemdDbus.New()
if err != nil {
return err
}
}
properties = append(properties, systemdDbus.PropSlice(slice))
properties = append(properties, newProp("PIDs", []uint32{uint32(pid)}))
properties = append(properties, newProp("Delegate", true))
properties = append(properties, newProp("DefaultDependencies", false))
ch := make(chan string)
_, err = conn.StartTransientUnit(unitName, "replace", properties, ch)
if err != nil {
// On errors check if the cgroup already exists, if it does move the process there
if props, err := conn.GetUnitTypeProperties(unitName, "Scope"); err == nil {
if cgroup, ok := props["ControlGroup"].(string); ok && cgroup != "" {
if err := moveUnderCgroup(cgroup, "", []uint32{uint32(pid)}); err != nil {
return err
}
return nil
}
}
return err
}
defer conn.Close()
// Block until job is started
<-ch
return nil
}
func getCgroupProcess(procFile string) (string, error) {
f, err := os.Open(procFile)
if err != nil {
return "", err
}
defer f.Close()
scanner := bufio.NewScanner(f)
cgroup := "/"
for scanner.Scan() {
line := scanner.Text()
parts := strings.SplitN(line, ":", 3)
if len(parts) != 3 {
return "", errors.Errorf("cannot parse cgroup line %q", line)
}
if strings.HasPrefix(line, "0::") {
cgroup = line[3:]
break
}
// root cgroup, skip it
if parts[2] == "/" {
continue
}
// The process must have the same cgroup path for all controllers
// The OCI runtime spec file allow us to specify only one path.
if cgroup != "/" && cgroup != parts[2] {
return "", errors.Errorf("cgroup configuration not supported, the process is in two different cgroups")
}
cgroup = parts[2]
}
if cgroup == "/" {
return "", errors.Errorf("could not find cgroup mount in %q", procFile)
}
return cgroup, nil
}
// GetOwnCgroup returns the cgroup for the current process.
func GetOwnCgroup() (string, error) {
return getCgroupProcess("/proc/self/cgroup")
}
// GetCgroupProcess returns the cgroup for the specified process process.
func GetCgroupProcess(pid int) (string, error) {
return getCgroupProcess(fmt.Sprintf("/proc/%d/cgroup", pid))
}
// MoveUnderCgroupSubtree moves the PID under a cgroup subtree.
func MoveUnderCgroupSubtree(subtree string) error {
return moveUnderCgroup("", subtree, nil)
}
// moveUnderCgroup moves a group of processes to a new cgroup.
// If cgroup is the empty string, then the current calling process cgroup is used.
// If processes is empty, then the processes from the current cgroup are moved.
func moveUnderCgroup(cgroup, subtree string, processes []uint32) error {
procFile := "/proc/self/cgroup"
f, err := os.Open(procFile)
if err != nil {
return err
}
defer f.Close()
unifiedMode, err := cgroups.IsCgroup2UnifiedMode()
if err != nil {
return err
}
scanner := bufio.NewScanner(f)
for scanner.Scan() {
line := scanner.Text()
parts := strings.SplitN(line, ":", 3)
if len(parts) != 3 {
return errors.Errorf("cannot parse cgroup line %q", line)
}
// root cgroup, skip it
if parts[2] == "/" {
continue
}
cgroupRoot := "/sys/fs/cgroup"
// Special case the unified mount on hybrid cgroup and named hierarchies.
// This works on Fedora 31, but we should really parse the mounts to see
// where the cgroup hierarchy is mounted.
if parts[1] == "" && !unifiedMode {
// If it is not using unified mode, the cgroup v2 hierarchy is
// usually mounted under /sys/fs/cgroup/unified
cgroupRoot = filepath.Join(cgroupRoot, "unified")
} else if parts[1] != "" {
// Assume the controller is mounted at /sys/fs/cgroup/$CONTROLLER.
controller := strings.TrimPrefix(parts[1], "name=")
cgroupRoot = filepath.Join(cgroupRoot, controller)
}
parentCgroup := cgroup
if parentCgroup == "" {
parentCgroup = parts[2]
}
newCgroup := filepath.Join(cgroupRoot, parentCgroup, subtree)
if err := os.Mkdir(newCgroup, 0755); err != nil && !os.IsExist(err) {
return err
}
f, err := os.OpenFile(filepath.Join(newCgroup, "cgroup.procs"), os.O_RDWR, 0755)
if err != nil {
return err
}
defer f.Close()
if len(processes) > 0 {
for _, pid := range processes {
if _, err := f.Write([]byte(fmt.Sprintf("%d\n", pid))); err != nil {
logrus.Warnf("Cannot move process %d to cgroup %q", pid, newCgroup)
}
}
} else {
processesData, err := ioutil.ReadFile(filepath.Join(cgroupRoot, parts[2], "cgroup.procs"))
if err != nil {
return err
}
for _, pid := range bytes.Split(processesData, []byte("\n")) {
if len(pid) == 0 {
continue
}
if _, err := f.Write(pid); err != nil {
logrus.Warnf("Cannot move process %s to cgroup %q", string(pid), newCgroup)
}
}
}
}
return nil
}
func newProp(name string, units interface{}) systemdDbus.Property {
return systemdDbus.Property{
Name: name,
Value: dbus.MakeVariant(units),
}
}
|