aboutsummaryrefslogtreecommitdiff
path: root/libpod
diff options
context:
space:
mode:
Diffstat (limited to 'libpod')
-rw-r--r--libpod/boltdb_state.go176
-rw-r--r--libpod/boltdb_state_freebsd.go25
-rw-r--r--libpod/boltdb_state_internal.go74
-rw-r--r--libpod/boltdb_state_linux.go2
-rw-r--r--libpod/boltdb_state_unsupported.go19
-rw-r--r--libpod/common/common.go16
-rw-r--r--libpod/common/signing_options.go10
-rw-r--r--libpod/container.go51
-rw-r--r--libpod/container_api.go12
-rw-r--r--libpod/container_commit.go4
-rw-r--r--libpod/container_config.go5
-rw-r--r--libpod/container_copy_unsupported.go17
-rw-r--r--libpod/container_exec.go14
-rw-r--r--libpod/container_freebsd.go31
-rw-r--r--libpod/container_graph.go93
-rw-r--r--libpod/container_inspect.go309
-rw-r--r--libpod/container_inspect_freebsd.go17
-rw-r--r--libpod/container_inspect_linux.go306
-rw-r--r--libpod/container_internal.go149
-rw-r--r--libpod/container_internal_common.go2694
-rw-r--r--libpod/container_internal_freebsd.go274
-rw-r--r--libpod/container_internal_linux.go3028
-rw-r--r--libpod/container_internal_unsupported.go99
-rw-r--r--libpod/container_linux.go15
-rw-r--r--libpod/container_path_resolution.go30
-rw-r--r--libpod/container_path_resolution_test.go28
-rw-r--r--libpod/container_stat_linux.go2
-rw-r--r--libpod/container_stat_unsupported.go14
-rw-r--r--libpod/container_top_linux.go2
-rw-r--r--libpod/container_top_unsupported.go14
-rw-r--r--libpod/container_unsupported.go7
-rw-r--r--libpod/container_validate.go8
-rw-r--r--libpod/define/config.go9
-rw-r--r--libpod/define/container_inspect.go6
-rw-r--r--libpod/define/errors.go3
-rw-r--r--libpod/define/exec_codes.go4
-rw-r--r--libpod/define/healthchecks.go74
-rw-r--r--libpod/define/mount.go2
-rw-r--r--libpod/define/mount_freebsd.go8
-rw-r--r--libpod/define/mount_linux.go8
-rw-r--r--libpod/define/mount_unsupported.go8
-rw-r--r--libpod/define/sdnotify.go20
-rw-r--r--libpod/define/volume_inspect.go2
-rw-r--r--libpod/events.go29
-rw-r--r--libpod/events/events_freebsd.go23
-rw-r--r--libpod/events/events_linux.go4
-rw-r--r--libpod/events/events_unsupported.go4
-rw-r--r--libpod/events/journal_linux.go100
-rw-r--r--libpod/events/logfile.go22
-rw-r--r--libpod/events/nullout.go13
-rw-r--r--libpod/healthcheck.go41
-rw-r--r--libpod/healthcheck_linux.go2
-rw-r--r--libpod/healthcheck_unsupported.go25
-rw-r--r--libpod/info.go199
-rw-r--r--libpod/info_freebsd.go40
-rw-r--r--libpod/info_linux.go132
-rw-r--r--libpod/info_unsupported.go14
-rw-r--r--libpod/kube.go39
-rw-r--r--libpod/lock/file/file_lock.go11
-rw-r--r--libpod/networking_common.go719
-rw-r--r--libpod/networking_freebsd.go268
-rw-r--r--libpod/networking_linux.go717
-rw-r--r--libpod/networking_slirp4netns.go12
-rw-r--r--libpod/networking_unsupported.go91
-rw-r--r--libpod/oci.go4
-rw-r--r--libpod/oci_conmon_attach_common.go305
-rw-r--r--libpod/oci_conmon_attach_freebsd.go21
-rw-r--r--libpod/oci_conmon_attach_linux.go297
-rw-r--r--libpod/oci_conmon_common.go1650
-rw-r--r--libpod/oci_conmon_exec_common.go771
-rw-r--r--libpod/oci_conmon_exec_freebsd.go10
-rw-r--r--libpod/oci_conmon_exec_linux.go768
-rw-r--r--libpod/oci_conmon_freebsd.go27
-rw-r--r--libpod/oci_conmon_linux.go1658
-rw-r--r--libpod/oci_conmon_unsupported.go24
-rw-r--r--libpod/oci_missing.go6
-rw-r--r--libpod/options.go56
-rw-r--r--libpod/plugin/volume_api.go59
-rw-r--r--libpod/pod_api.go20
-rw-r--r--libpod/pod_internal.go6
-rw-r--r--libpod/pod_top_unsupported.go20
-rw-r--r--libpod/runtime.go166
-rw-r--r--libpod/runtime_cstorage.go8
-rw-r--r--libpod/runtime_ctr.go128
-rw-r--r--libpod/runtime_ctr_freebsd.go5
-rw-r--r--libpod/runtime_ctr_linux.go5
-rw-r--r--libpod/runtime_img.go6
-rw-r--r--libpod/runtime_migrate.go2
-rw-r--r--libpod/runtime_migrate_unsupported.go16
-rw-r--r--libpod/runtime_pod_linux.go110
-rw-r--r--libpod/runtime_pod_unsupported.go30
-rw-r--r--libpod/runtime_renumber.go6
-rw-r--r--libpod/runtime_test.go28
-rw-r--r--libpod/runtime_volume_linux.go6
-rw-r--r--libpod/runtime_volume_unsupported.go42
-rw-r--r--libpod/stats_common.go49
-rw-r--r--libpod/stats_freebsd.go153
-rw-r--r--libpod/stats_linux.go (renamed from libpod/stats.go)50
-rw-r--r--libpod/stats_unsupported.go17
-rw-r--r--libpod/util.go2
-rw-r--r--libpod/util_linux.go8
-rw-r--r--libpod/util_unsupported.go27
-rw-r--r--libpod/volume.go2
-rw-r--r--libpod/volume_inspect.go9
-rw-r--r--libpod/volume_internal_unsupported.go32
105 files changed, 9435 insertions, 7368 deletions
diff --git a/libpod/boltdb_state.go b/libpod/boltdb_state.go
index 81f11410b..4fd95a3cf 100644
--- a/libpod/boltdb_state.go
+++ b/libpod/boltdb_state.go
@@ -85,7 +85,7 @@ func NewBoltState(path string, runtime *Runtime) (State, error) {
db, err := bolt.Open(path, 0600, nil)
if err != nil {
- return nil, fmt.Errorf("error opening database %s: %w", path, err)
+ return nil, fmt.Errorf("opening database %s: %w", path, err)
}
// Everywhere else, we use s.deferredCloseDBCon(db) to ensure the state's DB
// mutex is also unlocked.
@@ -123,7 +123,7 @@ func NewBoltState(path string, runtime *Runtime) (State, error) {
return nil
})
if err != nil {
- return nil, fmt.Errorf("error checking DB schema: %w", err)
+ return nil, fmt.Errorf("checking DB schema: %w", err)
}
if !needsUpdate {
@@ -135,13 +135,13 @@ func NewBoltState(path string, runtime *Runtime) (State, error) {
err = db.Update(func(tx *bolt.Tx) error {
for _, bkt := range createBuckets {
if _, err := tx.CreateBucketIfNotExists(bkt); err != nil {
- return fmt.Errorf("error creating bucket %s: %w", string(bkt), err)
+ return fmt.Errorf("creating bucket %s: %w", string(bkt), err)
}
}
return nil
})
if err != nil {
- return nil, fmt.Errorf("error creating buckets for DB: %w", err)
+ return nil, fmt.Errorf("creating buckets for DB: %w", err)
}
state.valid = true
@@ -220,11 +220,11 @@ func (s *BoltState) Refresh() error {
return nil
})
if err != nil {
- return fmt.Errorf("error reading exit codes bucket: %w", err)
+ return fmt.Errorf("reading exit codes bucket: %w", err)
}
for _, id := range toRemoveExitCodes {
if err := exitCodeBucket.Delete([]byte(id)); err != nil {
- return fmt.Errorf("error removing exit code for ID %s: %w", id, err)
+ return fmt.Errorf("removing exit code for ID %s: %w", id, err)
}
}
@@ -276,7 +276,7 @@ func (s *BoltState) Refresh() error {
state := new(podState)
if err := json.Unmarshal(stateBytes, state); err != nil {
- return fmt.Errorf("error unmarshalling state for pod %s: %w", string(id), err)
+ return fmt.Errorf("unmarshalling state for pod %s: %w", string(id), err)
}
// Clear the Cgroup path
@@ -284,11 +284,11 @@ func (s *BoltState) Refresh() error {
newStateBytes, err := json.Marshal(state)
if err != nil {
- return fmt.Errorf("error marshalling modified state for pod %s: %w", string(id), err)
+ return fmt.Errorf("marshalling modified state for pod %s: %w", string(id), err)
}
if err := podBkt.Put(stateKey, newStateBytes); err != nil {
- return fmt.Errorf("error updating state for pod %s in DB: %w", string(id), err)
+ return fmt.Errorf("updating state for pod %s in DB: %w", string(id), err)
}
// It's not a container, nothing to do
@@ -297,7 +297,7 @@ func (s *BoltState) Refresh() error {
// First, delete the network namespace
if err := ctrBkt.Delete(netNSKey); err != nil {
- return fmt.Errorf("error removing network namespace for container %s: %w", string(id), err)
+ return fmt.Errorf("removing network namespace for container %s: %w", string(id), err)
}
stateBytes := ctrBkt.Get(stateKey)
@@ -309,18 +309,18 @@ func (s *BoltState) Refresh() error {
state := new(ContainerState)
if err := json.Unmarshal(stateBytes, state); err != nil {
- return fmt.Errorf("error unmarshalling state for container %s: %w", string(id), err)
+ return fmt.Errorf("unmarshalling state for container %s: %w", string(id), err)
}
resetState(state)
newStateBytes, err := json.Marshal(state)
if err != nil {
- return fmt.Errorf("error marshalling modified state for container %s: %w", string(id), err)
+ return fmt.Errorf("marshalling modified state for container %s: %w", string(id), err)
}
if err := ctrBkt.Put(stateKey, newStateBytes); err != nil {
- return fmt.Errorf("error updating state for container %s in DB: %w", string(id), err)
+ return fmt.Errorf("updating state for container %s in DB: %w", string(id), err)
}
// Delete all exec sessions, if there are any
@@ -338,7 +338,7 @@ func (s *BoltState) Refresh() error {
}
for _, execID := range toRemove {
if err := ctrExecBkt.Delete([]byte(execID)); err != nil {
- return fmt.Errorf("error removing exec session %s from container %s: %w", execID, string(id), err)
+ return fmt.Errorf("removing exec session %s from container %s: %w", execID, string(id), err)
}
}
}
@@ -358,12 +358,12 @@ func (s *BoltState) Refresh() error {
if testID := namesBucket.Get(name); testID != nil {
logrus.Infof("Found dangling name %s (ID %s) in database", string(name), id)
if err := namesBucket.Delete(name); err != nil {
- return fmt.Errorf("error removing dangling name %s (ID %s) from database: %w", string(name), id, err)
+ return fmt.Errorf("removing dangling name %s (ID %s) from database: %w", string(name), id, err)
}
}
}
if err := idBucket.Delete([]byte(id)); err != nil {
- return fmt.Errorf("error removing dangling ID %s from database: %w", id, err)
+ return fmt.Errorf("removing dangling ID %s from database: %w", id, err)
}
}
@@ -384,7 +384,7 @@ func (s *BoltState) Refresh() error {
oldState := new(VolumeState)
if err := json.Unmarshal(volStateBytes, oldState); err != nil {
- return fmt.Errorf("error unmarshalling state for volume %s: %w", string(id), err)
+ return fmt.Errorf("unmarshalling state for volume %s: %w", string(id), err)
}
// Reset mount count to 0
@@ -393,11 +393,11 @@ func (s *BoltState) Refresh() error {
newState, err := json.Marshal(oldState)
if err != nil {
- return fmt.Errorf("error marshalling state for volume %s: %w", string(id), err)
+ return fmt.Errorf("marshalling state for volume %s: %w", string(id), err)
}
if err := dbVol.Put(stateKey, newState); err != nil {
- return fmt.Errorf("error storing new state for volume %s: %w", string(id), err)
+ return fmt.Errorf("storing new state for volume %s: %w", string(id), err)
}
return nil
@@ -421,7 +421,7 @@ func (s *BoltState) Refresh() error {
for _, execSession := range toRemoveExec {
if err := execBucket.Delete([]byte(execSession)); err != nil {
- return fmt.Errorf("error deleting exec session %s registry from database: %w", execSession, err)
+ return fmt.Errorf("deleting exec session %s registry from database: %w", execSession, err)
}
}
@@ -841,7 +841,7 @@ func (s *BoltState) UpdateContainer(ctr *Container) error {
}
if err := json.Unmarshal(newStateBytes, newState); err != nil {
- return fmt.Errorf("error unmarshalling container %s state: %w", ctr.ID(), err)
+ return fmt.Errorf("unmarshalling container %s state: %w", ctr.ID(), err)
}
netNSBytes := ctrToUpdate.Get(netNSKey)
@@ -886,7 +886,7 @@ func (s *BoltState) SaveContainer(ctr *Container) error {
stateJSON, err := json.Marshal(ctr.state)
if err != nil {
- return fmt.Errorf("error marshalling container %s state to JSON: %w", ctr.ID(), err)
+ return fmt.Errorf("marshalling container %s state to JSON: %w", ctr.ID(), err)
}
netNSPath := getNetNSPath(ctr)
@@ -912,17 +912,17 @@ func (s *BoltState) SaveContainer(ctr *Container) error {
// Update the state
if err := ctrToSave.Put(stateKey, stateJSON); err != nil {
- return fmt.Errorf("error updating container %s state in DB: %w", ctr.ID(), err)
+ return fmt.Errorf("updating container %s state in DB: %w", ctr.ID(), err)
}
if netNSPath != "" {
if err := ctrToSave.Put(netNSKey, []byte(netNSPath)); err != nil {
- return fmt.Errorf("error updating network namespace path for container %s in DB: %w", ctr.ID(), err)
+ return fmt.Errorf("updating network namespace path for container %s in DB: %w", ctr.ID(), err)
}
} else {
// Delete the existing network namespace
if err := ctrToSave.Delete(netNSKey); err != nil {
- return fmt.Errorf("error removing network namespace path for container %s in DB: %w", ctr.ID(), err)
+ return fmt.Errorf("removing network namespace path for container %s in DB: %w", ctr.ID(), err)
}
}
@@ -1142,7 +1142,7 @@ func (s *BoltState) GetNetworks(ctr *Container) (map[string]types.PerNetworkOpti
if ctrNetworkBkt == nil {
ctrNetworkBkt, err = dbCtr.CreateBucket(networksBkt)
if err != nil {
- return fmt.Errorf("error creating networks bucket for container %s: %w", ctr.ID(), err)
+ return fmt.Errorf("creating networks bucket for container %s: %w", ctr.ID(), err)
}
// the container has no networks in the db lookup config and write to the db
networkList = ctr.config.NetworksDeprecated
@@ -1249,7 +1249,7 @@ func (s *BoltState) NetworkConnect(ctr *Container, network string, opts types.Pe
optBytes, err := json.Marshal(opts)
if err != nil {
- return fmt.Errorf("error marshalling network options JSON for container %s: %w", ctr.ID(), err)
+ return fmt.Errorf("marshalling network options JSON for container %s: %w", ctr.ID(), err)
}
ctrID := []byte(ctr.ID())
@@ -1278,12 +1278,12 @@ func (s *BoltState) NetworkConnect(ctr *Container, network string, opts types.Pe
}
netConnected := ctrNetworksBkt.Get([]byte(network))
if netConnected != nil {
- return fmt.Errorf("container %s is already connected to network %q: %w", ctr.ID(), network, define.ErrNetworkExists)
+ return fmt.Errorf("container %s is already connected to network %q: %w", ctr.ID(), network, define.ErrNetworkConnected)
}
// Add the network
if err := ctrNetworksBkt.Put([]byte(network), optBytes); err != nil {
- return fmt.Errorf("error adding container %s to network %s in DB: %w", ctr.ID(), network, err)
+ return fmt.Errorf("adding container %s to network %s in DB: %w", ctr.ID(), network, err)
}
return nil
@@ -1340,7 +1340,7 @@ func (s *BoltState) NetworkDisconnect(ctr *Container, network string) error {
}
if err := ctrNetworksBkt.Delete([]byte(network)); err != nil {
- return fmt.Errorf("error removing container %s from network %s: %w", ctr.ID(), network, err)
+ return fmt.Errorf("removing container %s from network %s: %w", ctr.ID(), network, err)
}
if ctrAliasesBkt != nil {
@@ -1350,7 +1350,7 @@ func (s *BoltState) NetworkDisconnect(ctr *Container, network string) error {
}
if err := ctrAliasesBkt.DeleteBucket([]byte(network)); err != nil {
- return fmt.Errorf("error removing container %s network aliases for network %s: %w", ctr.ID(), network, err)
+ return fmt.Errorf("removing container %s network aliases for network %s: %w", ctr.ID(), network, err)
}
}
@@ -1626,7 +1626,7 @@ func (s *BoltState) AddExecSession(ctr *Container, session *ExecSession) error {
ctrExecSessionBucket, err := dbCtr.CreateBucketIfNotExists(execBkt)
if err != nil {
- return fmt.Errorf("error creating exec sessions bucket for container %s: %w", ctr.ID(), err)
+ return fmt.Errorf("creating exec sessions bucket for container %s: %w", ctr.ID(), err)
}
execExists := execBucket.Get(sessionID)
@@ -1635,11 +1635,11 @@ func (s *BoltState) AddExecSession(ctr *Container, session *ExecSession) error {
}
if err := execBucket.Put(sessionID, ctrID); err != nil {
- return fmt.Errorf("error adding exec session %s to DB: %w", session.ID(), err)
+ return fmt.Errorf("adding exec session %s to DB: %w", session.ID(), err)
}
if err := ctrExecSessionBucket.Put(sessionID, ctrID); err != nil {
- return fmt.Errorf("error adding exec session %s to container %s in DB: %w", session.ID(), ctr.ID(), err)
+ return fmt.Errorf("adding exec session %s to container %s in DB: %w", session.ID(), ctr.ID(), err)
}
return nil
@@ -1716,7 +1716,7 @@ func (s *BoltState) RemoveExecSession(session *ExecSession) error {
}
if err := execBucket.Delete(sessionID); err != nil {
- return fmt.Errorf("error removing exec session %s from database: %w", session.ID(), err)
+ return fmt.Errorf("removing exec session %s from database: %w", session.ID(), err)
}
dbCtr := ctrBucket.Bucket(containerID)
@@ -1739,7 +1739,7 @@ func (s *BoltState) RemoveExecSession(session *ExecSession) error {
ctrSessionExists := ctrExecBucket.Get(sessionID)
if ctrSessionExists != nil {
if err := ctrExecBucket.Delete(sessionID); err != nil {
- return fmt.Errorf("error removing exec session %s from container %s in database: %w", session.ID(), session.ContainerID(), err)
+ return fmt.Errorf("removing exec session %s from container %s in database: %w", session.ID(), session.ContainerID(), err)
}
}
@@ -1847,7 +1847,7 @@ func (s *BoltState) RemoveContainerExecSessions(ctr *Container) error {
for _, session := range sessions {
if err := ctrExecSessions.Delete([]byte(session)); err != nil {
- return fmt.Errorf("error removing container %s exec session %s from database: %w", ctr.ID(), session, err)
+ return fmt.Errorf("removing container %s exec session %s from database: %w", ctr.ID(), session, err)
}
// Check if the session exists in the global table
// before removing. It should, but in cases where the DB
@@ -1861,7 +1861,7 @@ func (s *BoltState) RemoveContainerExecSessions(ctr *Container) error {
return fmt.Errorf("database mismatch: exec session %s is associated with containers %s and %s: %w", session, ctr.ID(), string(sessionExists), define.ErrInternal)
}
if err := execBucket.Delete([]byte(session)); err != nil {
- return fmt.Errorf("error removing container %s exec session %s from exec sessions: %w", ctr.ID(), session, err)
+ return fmt.Errorf("removing container %s exec session %s from exec sessions: %w", ctr.ID(), session, err)
}
}
@@ -1884,7 +1884,7 @@ func (s *BoltState) RewriteContainerConfig(ctr *Container, newCfg *ContainerConf
newCfgJSON, err := json.Marshal(newCfg)
if err != nil {
- return fmt.Errorf("error marshalling new configuration JSON for container %s: %w", ctr.ID(), err)
+ return fmt.Errorf("marshalling new configuration JSON for container %s: %w", ctr.ID(), err)
}
db, err := s.getDBCon()
@@ -1906,7 +1906,7 @@ func (s *BoltState) RewriteContainerConfig(ctr *Container, newCfg *ContainerConf
}
if err := ctrDB.Put(configKey, newCfgJSON); err != nil {
- return fmt.Errorf("error updating container %s config JSON: %w", ctr.ID(), err)
+ return fmt.Errorf("updating container %s config JSON: %w", ctr.ID(), err)
}
return nil
@@ -1937,7 +1937,7 @@ func (s *BoltState) SafeRewriteContainerConfig(ctr *Container, oldName, newName
newCfgJSON, err := json.Marshal(newCfg)
if err != nil {
- return fmt.Errorf("error marshalling new configuration JSON for container %s: %w", ctr.ID(), err)
+ return fmt.Errorf("marshalling new configuration JSON for container %s: %w", ctr.ID(), err)
}
db, err := s.getDBCon()
@@ -1978,16 +1978,16 @@ func (s *BoltState) SafeRewriteContainerConfig(ctr *Container, oldName, newName
// buckets are ID-indexed so we just need to
// overwrite the values there.
if err := namesBkt.Delete([]byte(oldName)); err != nil {
- return fmt.Errorf("error deleting container %s old name from DB for rename: %w", ctr.ID(), err)
+ return fmt.Errorf("deleting container %s old name from DB for rename: %w", ctr.ID(), err)
}
if err := idBkt.Put([]byte(ctr.ID()), []byte(newName)); err != nil {
- return fmt.Errorf("error renaming container %s in ID bucket in DB: %w", ctr.ID(), err)
+ return fmt.Errorf("renaming container %s in ID bucket in DB: %w", ctr.ID(), err)
}
if err := namesBkt.Put([]byte(newName), []byte(ctr.ID())); err != nil {
- return fmt.Errorf("error adding new name %s for container %s in DB: %w", newName, ctr.ID(), err)
+ return fmt.Errorf("adding new name %s for container %s in DB: %w", newName, ctr.ID(), err)
}
if err := allCtrsBkt.Put([]byte(ctr.ID()), []byte(newName)); err != nil {
- return fmt.Errorf("error renaming container %s in all containers bucket in DB: %w", ctr.ID(), err)
+ return fmt.Errorf("renaming container %s in all containers bucket in DB: %w", ctr.ID(), err)
}
if ctr.config.Pod != "" {
podsBkt, err := getPodBucket(tx)
@@ -2003,7 +2003,7 @@ func (s *BoltState) SafeRewriteContainerConfig(ctr *Container, oldName, newName
return fmt.Errorf("pod %s does not have a containers bucket: %w", ctr.config.Pod, define.ErrInternal)
}
if err := podCtrBkt.Put([]byte(ctr.ID()), []byte(newName)); err != nil {
- return fmt.Errorf("error renaming container %s in pod %s members bucket: %w", ctr.ID(), ctr.config.Pod, err)
+ return fmt.Errorf("renaming container %s in pod %s members bucket: %w", ctr.ID(), ctr.config.Pod, err)
}
}
}
@@ -2021,7 +2021,7 @@ func (s *BoltState) SafeRewriteContainerConfig(ctr *Container, oldName, newName
}
if err := ctrDB.Put(configKey, newCfgJSON); err != nil {
- return fmt.Errorf("error updating container %s config JSON: %w", ctr.ID(), err)
+ return fmt.Errorf("updating container %s config JSON: %w", ctr.ID(), err)
}
return nil
@@ -2043,7 +2043,7 @@ func (s *BoltState) RewritePodConfig(pod *Pod, newCfg *PodConfig) error {
newCfgJSON, err := json.Marshal(newCfg)
if err != nil {
- return fmt.Errorf("error marshalling new configuration JSON for pod %s: %w", pod.ID(), err)
+ return fmt.Errorf("marshalling new configuration JSON for pod %s: %w", pod.ID(), err)
}
db, err := s.getDBCon()
@@ -2065,7 +2065,7 @@ func (s *BoltState) RewritePodConfig(pod *Pod, newCfg *PodConfig) error {
}
if err := podDB.Put(configKey, newCfgJSON); err != nil {
- return fmt.Errorf("error updating pod %s config JSON: %w", pod.ID(), err)
+ return fmt.Errorf("updating pod %s config JSON: %w", pod.ID(), err)
}
return nil
@@ -2087,7 +2087,7 @@ func (s *BoltState) RewriteVolumeConfig(volume *Volume, newCfg *VolumeConfig) er
newCfgJSON, err := json.Marshal(newCfg)
if err != nil {
- return fmt.Errorf("error marshalling new configuration JSON for volume %q: %w", volume.Name(), err)
+ return fmt.Errorf("marshalling new configuration JSON for volume %q: %w", volume.Name(), err)
}
db, err := s.getDBCon()
@@ -2109,7 +2109,7 @@ func (s *BoltState) RewriteVolumeConfig(volume *Volume, newCfg *VolumeConfig) er
}
if err := volDB.Put(configKey, newCfgJSON); err != nil {
- return fmt.Errorf("error updating volume %q config JSON: %w", volume.Name(), err)
+ return fmt.Errorf("updating volume %q config JSON: %w", volume.Name(), err)
}
return nil
@@ -2522,7 +2522,7 @@ func (s *BoltState) AddVolume(volume *Volume) error {
volConfigJSON, err := json.Marshal(volume.config)
if err != nil {
- return fmt.Errorf("error marshalling volume %s config to JSON: %w", volume.Name(), err)
+ return fmt.Errorf("marshalling volume %s config to JSON: %w", volume.Name(), err)
}
// Volume state is allowed to not exist
@@ -2530,7 +2530,7 @@ func (s *BoltState) AddVolume(volume *Volume) error {
if volume.state != nil {
volStateJSON, err = json.Marshal(volume.state)
if err != nil {
- return fmt.Errorf("error marshalling volume %s state to JSON: %w", volume.Name(), err)
+ return fmt.Errorf("marshalling volume %s state to JSON: %w", volume.Name(), err)
}
}
@@ -2561,27 +2561,27 @@ func (s *BoltState) AddVolume(volume *Volume) error {
// Make a bucket for it
newVol, err := volBkt.CreateBucket(volName)
if err != nil {
- return fmt.Errorf("error creating bucket for volume %s: %w", volume.Name(), err)
+ return fmt.Errorf("creating bucket for volume %s: %w", volume.Name(), err)
}
// Make a subbucket for the containers using the volume. Dependent container IDs will be addedremoved to
// this bucket in addcontainer/removeContainer
if _, err := newVol.CreateBucket(volDependenciesBkt); err != nil {
- return fmt.Errorf("error creating bucket for containers using volume %s: %w", volume.Name(), err)
+ return fmt.Errorf("creating bucket for containers using volume %s: %w", volume.Name(), err)
}
if err := newVol.Put(configKey, volConfigJSON); err != nil {
- return fmt.Errorf("error storing volume %s configuration in DB: %w", volume.Name(), err)
+ return fmt.Errorf("storing volume %s configuration in DB: %w", volume.Name(), err)
}
if volStateJSON != nil {
if err := newVol.Put(stateKey, volStateJSON); err != nil {
- return fmt.Errorf("error storing volume %s state in DB: %w", volume.Name(), err)
+ return fmt.Errorf("storing volume %s state in DB: %w", volume.Name(), err)
}
}
if err := allVolsBkt.Put(volName, volName); err != nil {
- return fmt.Errorf("error storing volume %s in all volumes bucket in DB: %w", volume.Name(), err)
+ return fmt.Errorf("storing volume %s in all volumes bucket in DB: %w", volume.Name(), err)
}
return nil
@@ -2650,7 +2650,7 @@ func (s *BoltState) RemoveVolume(volume *Volume) error {
return nil
})
if err != nil {
- return fmt.Errorf("error getting list of dependencies from dependencies bucket for volumes %q: %w", volume.Name(), err)
+ return fmt.Errorf("getting list of dependencies from dependencies bucket for volumes %q: %w", volume.Name(), err)
}
if len(deps) > 0 {
return fmt.Errorf("volume %s is being used by container(s) %s: %w", volume.Name(), strings.Join(deps, ","), define.ErrVolumeBeingUsed)
@@ -2660,10 +2660,10 @@ func (s *BoltState) RemoveVolume(volume *Volume) error {
// volume is ready for removal
// Let's kick it out
if err := allVolsBkt.Delete(volName); err != nil {
- return fmt.Errorf("error removing volume %s from all volumes bucket in DB: %w", volume.Name(), err)
+ return fmt.Errorf("removing volume %s from all volumes bucket in DB: %w", volume.Name(), err)
}
if err := volBkt.DeleteBucket(volName); err != nil {
- return fmt.Errorf("error removing volume %s from DB: %w", volume.Name(), err)
+ return fmt.Errorf("removing volume %s from DB: %w", volume.Name(), err)
}
return nil
@@ -2710,7 +2710,7 @@ func (s *BoltState) UpdateVolume(volume *Volume) error {
}
if err := json.Unmarshal(stateBytes, newState); err != nil {
- return fmt.Errorf("error unmarshalling volume %s state: %w", volume.Name(), err)
+ return fmt.Errorf("unmarshalling volume %s state: %w", volume.Name(), err)
}
return nil
@@ -2740,7 +2740,7 @@ func (s *BoltState) SaveVolume(volume *Volume) error {
if volume.state != nil {
stateJSON, err := json.Marshal(volume.state)
if err != nil {
- return fmt.Errorf("error marshalling volume %s state to JSON: %w", volume.Name(), err)
+ return fmt.Errorf("marshalling volume %s state to JSON: %w", volume.Name(), err)
}
newStateJSON = stateJSON
}
@@ -3060,12 +3060,12 @@ func (s *BoltState) AddPod(pod *Pod) error {
podConfigJSON, err := json.Marshal(pod.config)
if err != nil {
- return fmt.Errorf("error marshalling pod %s config to JSON: %w", pod.ID(), err)
+ return fmt.Errorf("marshalling pod %s config to JSON: %w", pod.ID(), err)
}
podStateJSON, err := json.Marshal(pod.state)
if err != nil {
- return fmt.Errorf("error marshalling pod %s state to JSON: %w", pod.ID(), err)
+ return fmt.Errorf("marshalling pod %s state to JSON: %w", pod.ID(), err)
}
db, err := s.getDBCon()
@@ -3122,40 +3122,40 @@ func (s *BoltState) AddPod(pod *Pod) error {
// Make a bucket for it
newPod, err := podBkt.CreateBucket(podID)
if err != nil {
- return fmt.Errorf("error creating bucket for pod %s: %w", pod.ID(), err)
+ return fmt.Errorf("creating bucket for pod %s: %w", pod.ID(), err)
}
// Make a subbucket for pod containers
if _, err := newPod.CreateBucket(containersBkt); err != nil {
- return fmt.Errorf("error creating bucket for pod %s containers: %w", pod.ID(), err)
+ return fmt.Errorf("creating bucket for pod %s containers: %w", pod.ID(), err)
}
if err := newPod.Put(configKey, podConfigJSON); err != nil {
- return fmt.Errorf("error storing pod %s configuration in DB: %w", pod.ID(), err)
+ return fmt.Errorf("storing pod %s configuration in DB: %w", pod.ID(), err)
}
if err := newPod.Put(stateKey, podStateJSON); err != nil {
- return fmt.Errorf("error storing pod %s state JSON in DB: %w", pod.ID(), err)
+ return fmt.Errorf("storing pod %s state JSON in DB: %w", pod.ID(), err)
}
if podNamespace != nil {
if err := newPod.Put(namespaceKey, podNamespace); err != nil {
- return fmt.Errorf("error storing pod %s namespace in DB: %w", pod.ID(), err)
+ return fmt.Errorf("storing pod %s namespace in DB: %w", pod.ID(), err)
}
if err := nsBkt.Put(podID, podNamespace); err != nil {
- return fmt.Errorf("error storing pod %s namespace in DB: %w", pod.ID(), err)
+ return fmt.Errorf("storing pod %s namespace in DB: %w", pod.ID(), err)
}
}
// Add us to the ID and names buckets
if err := idsBkt.Put(podID, podName); err != nil {
- return fmt.Errorf("error storing pod %s ID in DB: %w", pod.ID(), err)
+ return fmt.Errorf("storing pod %s ID in DB: %w", pod.ID(), err)
}
if err := namesBkt.Put(podName, podID); err != nil {
- return fmt.Errorf("error storing pod %s name in DB: %w", pod.Name(), err)
+ return fmt.Errorf("storing pod %s name in DB: %w", pod.Name(), err)
}
if err := allPodsBkt.Put(podID, podName); err != nil {
- return fmt.Errorf("error storing pod %s in all pods bucket in DB: %w", pod.ID(), err)
+ return fmt.Errorf("storing pod %s in all pods bucket in DB: %w", pod.ID(), err)
}
return nil
@@ -3240,19 +3240,19 @@ func (s *BoltState) RemovePod(pod *Pod) error {
// Pod is empty, and ready for removal
// Let's kick it out
if err := idsBkt.Delete(podID); err != nil {
- return fmt.Errorf("error removing pod %s ID from DB: %w", pod.ID(), err)
+ return fmt.Errorf("removing pod %s ID from DB: %w", pod.ID(), err)
}
if err := namesBkt.Delete(podName); err != nil {
- return fmt.Errorf("error removing pod %s name (%s) from DB: %w", pod.ID(), pod.Name(), err)
+ return fmt.Errorf("removing pod %s name (%s) from DB: %w", pod.ID(), pod.Name(), err)
}
if err := nsBkt.Delete(podID); err != nil {
- return fmt.Errorf("error removing pod %s namespace from DB: %w", pod.ID(), err)
+ return fmt.Errorf("removing pod %s namespace from DB: %w", pod.ID(), err)
}
if err := allPodsBkt.Delete(podID); err != nil {
- return fmt.Errorf("error removing pod %s ID from all pods bucket in DB: %w", pod.ID(), err)
+ return fmt.Errorf("removing pod %s ID from all pods bucket in DB: %w", pod.ID(), err)
}
if err := podBkt.DeleteBucket(podID); err != nil {
- return fmt.Errorf("error removing pod %s from DB: %w", pod.ID(), err)
+ return fmt.Errorf("removing pod %s from DB: %w", pod.ID(), err)
}
return nil
@@ -3353,19 +3353,19 @@ func (s *BoltState) RemovePodContainers(pod *Pod) error {
// Dependencies are set, we're clear to remove
if err := ctrBkt.DeleteBucket(id); err != nil {
- return fmt.Errorf("error deleting container %s from DB: %w", string(id), define.ErrInternal)
+ return fmt.Errorf("deleting container %s from DB: %w", string(id), define.ErrInternal)
}
if err := idsBkt.Delete(id); err != nil {
- return fmt.Errorf("error deleting container %s ID in DB: %w", string(id), err)
+ return fmt.Errorf("deleting container %s ID in DB: %w", string(id), err)
}
if err := namesBkt.Delete(name); err != nil {
- return fmt.Errorf("error deleting container %s name in DB: %w", string(id), err)
+ return fmt.Errorf("deleting container %s name in DB: %w", string(id), err)
}
if err := allCtrsBkt.Delete(id); err != nil {
- return fmt.Errorf("error deleting container %s ID from all containers bucket in DB: %w", string(id), err)
+ return fmt.Errorf("deleting container %s ID from all containers bucket in DB: %w", string(id), err)
}
return nil
@@ -3376,10 +3376,10 @@ func (s *BoltState) RemovePodContainers(pod *Pod) error {
// Delete and recreate the bucket to empty it
if err := podDB.DeleteBucket(containersBkt); err != nil {
- return fmt.Errorf("error removing pod %s containers bucket: %w", pod.ID(), err)
+ return fmt.Errorf("removing pod %s containers bucket: %w", pod.ID(), err)
}
if _, err := podDB.CreateBucket(containersBkt); err != nil {
- return fmt.Errorf("error recreating pod %s containers bucket: %w", pod.ID(), err)
+ return fmt.Errorf("recreating pod %s containers bucket: %w", pod.ID(), err)
}
return nil
@@ -3496,7 +3496,7 @@ func (s *BoltState) UpdatePod(pod *Pod) error {
}
if err := json.Unmarshal(podStateBytes, newState); err != nil {
- return fmt.Errorf("error unmarshalling pod %s state JSON: %w", pod.ID(), err)
+ return fmt.Errorf("unmarshalling pod %s state JSON: %w", pod.ID(), err)
}
return nil
@@ -3526,7 +3526,7 @@ func (s *BoltState) SavePod(pod *Pod) error {
stateJSON, err := json.Marshal(pod.state)
if err != nil {
- return fmt.Errorf("error marshalling pod %s state to JSON: %w", pod.ID(), err)
+ return fmt.Errorf("marshalling pod %s state to JSON: %w", pod.ID(), err)
}
db, err := s.getDBCon()
@@ -3551,7 +3551,7 @@ func (s *BoltState) SavePod(pod *Pod) error {
// Set the pod state JSON
if err := podDB.Put(stateKey, stateJSON); err != nil {
- return fmt.Errorf("error updating pod %s state in database: %w", pod.ID(), err)
+ return fmt.Errorf("updating pod %s state in database: %w", pod.ID(), err)
}
return nil
diff --git a/libpod/boltdb_state_freebsd.go b/libpod/boltdb_state_freebsd.go
new file mode 100644
index 000000000..d0a2d4f28
--- /dev/null
+++ b/libpod/boltdb_state_freebsd.go
@@ -0,0 +1,25 @@
+//go:build freebsd
+// +build freebsd
+
+package libpod
+
+// replaceNetNS handle network namespace transitions after updating a
+// container's state.
+func replaceNetNS(netNSPath string, ctr *Container, newState *ContainerState) error {
+ if netNSPath != "" {
+ // On FreeBSD, we just record the network jail's name in our state.
+ newState.NetNS = &jailNetNS{Name: netNSPath}
+ } else {
+ newState.NetNS = nil
+ }
+ return nil
+}
+
+// getNetNSPath retrieves the netns path to be stored in the database
+func getNetNSPath(ctr *Container) string {
+ if ctr.state.NetNS != nil {
+ return ctr.state.NetNS.Name
+ } else {
+ return ""
+ }
+}
diff --git a/libpod/boltdb_state_internal.go b/libpod/boltdb_state_internal.go
index f28fadfa9..87f1fa4eb 100644
--- a/libpod/boltdb_state_internal.go
+++ b/libpod/boltdb_state_internal.go
@@ -195,7 +195,7 @@ func checkRuntimeConfig(db *bolt.DB, rt *Runtime) error {
}
if err := configBkt.Put(missing.key, dbValue); err != nil {
- return fmt.Errorf("error updating %s in DB runtime config: %w", missing.name, err)
+ return fmt.Errorf("updating %s in DB runtime config: %w", missing.name, err)
}
}
@@ -254,7 +254,7 @@ func (s *BoltState) getDBCon() (*bolt.DB, error) {
db, err := bolt.Open(s.dbPath, 0600, nil)
if err != nil {
- return nil, fmt.Errorf("error opening database %s: %w", s.dbPath, err)
+ return nil, fmt.Errorf("opening database %s: %w", s.dbPath, err)
}
return db, nil
@@ -403,7 +403,7 @@ func (s *BoltState) getContainerConfigFromDB(id []byte, config *ContainerConfig,
}
if err := json.Unmarshal(configBytes, config); err != nil {
- return fmt.Errorf("error unmarshalling container %s config: %w", string(id), err)
+ return fmt.Errorf("unmarshalling container %s config: %w", string(id), err)
}
// convert ports to the new format if needed
@@ -426,7 +426,7 @@ func (s *BoltState) getContainerFromDB(id []byte, ctr *Container, ctrsBkt *bolt.
// Get the lock
lock, err := s.runtime.lockManager.RetrieveLock(ctr.config.LockID)
if err != nil {
- return fmt.Errorf("error retrieving lock for container %s: %w", string(id), err)
+ return fmt.Errorf("retrieving lock for container %s: %w", string(id), err)
}
ctr.lock = lock
@@ -489,13 +489,13 @@ func (s *BoltState) getPodFromDB(id []byte, pod *Pod, podBkt *bolt.Bucket) error
}
if err := json.Unmarshal(podConfigBytes, pod.config); err != nil {
- return fmt.Errorf("error unmarshalling pod %s config from DB: %w", string(id), err)
+ return fmt.Errorf("unmarshalling pod %s config from DB: %w", string(id), err)
}
// Get the lock
lock, err := s.runtime.lockManager.RetrieveLock(pod.config.LockID)
if err != nil {
- return fmt.Errorf("error retrieving lock for pod %s: %w", string(id), err)
+ return fmt.Errorf("retrieving lock for pod %s: %w", string(id), err)
}
pod.lock = lock
@@ -517,14 +517,14 @@ func (s *BoltState) getVolumeFromDB(name []byte, volume *Volume, volBkt *bolt.Bu
}
if err := json.Unmarshal(volConfigBytes, volume.config); err != nil {
- return fmt.Errorf("error unmarshalling volume %s config from DB: %w", string(name), err)
+ return fmt.Errorf("unmarshalling volume %s config from DB: %w", string(name), err)
}
// Volume state is allowed to be nil for legacy compatibility
volStateBytes := volDB.Get(stateKey)
if volStateBytes != nil {
if err := json.Unmarshal(volStateBytes, volume.state); err != nil {
- return fmt.Errorf("error unmarshalling volume %s state from DB: %w", string(name), err)
+ return fmt.Errorf("unmarshalling volume %s state from DB: %w", string(name), err)
}
}
@@ -546,7 +546,7 @@ func (s *BoltState) getVolumeFromDB(name []byte, volume *Volume, volBkt *bolt.Bu
// Get the lock
lock, err := s.runtime.lockManager.RetrieveLock(volume.config.LockID)
if err != nil {
- return fmt.Errorf("error retrieving lock for volume %q: %w", string(name), err)
+ return fmt.Errorf("retrieving lock for volume %q: %w", string(name), err)
}
volume.lock = lock
@@ -572,11 +572,11 @@ func (s *BoltState) addContainer(ctr *Container, pod *Pod) error {
// JSON container structs to insert into DB
configJSON, err := json.Marshal(ctr.config)
if err != nil {
- return fmt.Errorf("error marshalling container %s config to JSON: %w", ctr.ID(), err)
+ return fmt.Errorf("marshalling container %s config to JSON: %w", ctr.ID(), err)
}
stateJSON, err := json.Marshal(ctr.state)
if err != nil {
- return fmt.Errorf("error marshalling container %s state to JSON: %w", ctr.ID(), err)
+ return fmt.Errorf("marshalling container %s state to JSON: %w", ctr.ID(), err)
}
netNSPath := getNetNSPath(ctr)
dependsCtrs := ctr.Dependencies()
@@ -603,7 +603,7 @@ func (s *BoltState) addContainer(ctr *Container, pod *Pod) error {
opts.Aliases = append(opts.Aliases, ctr.config.ID[:12])
optBytes, err := json.Marshal(opts)
if err != nil {
- return fmt.Errorf("error marshalling network options JSON for container %s: %w", ctr.ID(), err)
+ return fmt.Errorf("marshalling network options JSON for container %s: %w", ctr.ID(), err)
}
networks[net] = optBytes
}
@@ -694,60 +694,60 @@ func (s *BoltState) addContainer(ctr *Container, pod *Pod) error {
// No overlapping containers
// Add the new container to the DB
if err := idsBucket.Put(ctrID, ctrName); err != nil {
- return fmt.Errorf("error adding container %s ID to DB: %w", ctr.ID(), err)
+ return fmt.Errorf("adding container %s ID to DB: %w", ctr.ID(), err)
}
if err := namesBucket.Put(ctrName, ctrID); err != nil {
- return fmt.Errorf("error adding container %s name (%s) to DB: %w", ctr.ID(), ctr.Name(), err)
+ return fmt.Errorf("adding container %s name (%s) to DB: %w", ctr.ID(), ctr.Name(), err)
}
if ctrNamespace != nil {
if err := nsBucket.Put(ctrID, ctrNamespace); err != nil {
- return fmt.Errorf("error adding container %s namespace (%q) to DB: %w", ctr.ID(), ctr.Namespace(), err)
+ return fmt.Errorf("adding container %s namespace (%q) to DB: %w", ctr.ID(), ctr.Namespace(), err)
}
}
if err := allCtrsBucket.Put(ctrID, ctrName); err != nil {
- return fmt.Errorf("error adding container %s to all containers bucket in DB: %w", ctr.ID(), err)
+ return fmt.Errorf("adding container %s to all containers bucket in DB: %w", ctr.ID(), err)
}
newCtrBkt, err := ctrBucket.CreateBucket(ctrID)
if err != nil {
- return fmt.Errorf("error adding container %s bucket to DB: %w", ctr.ID(), err)
+ return fmt.Errorf("adding container %s bucket to DB: %w", ctr.ID(), err)
}
if err := newCtrBkt.Put(configKey, configJSON); err != nil {
- return fmt.Errorf("error adding container %s config to DB: %w", ctr.ID(), err)
+ return fmt.Errorf("adding container %s config to DB: %w", ctr.ID(), err)
}
if err := newCtrBkt.Put(stateKey, stateJSON); err != nil {
- return fmt.Errorf("error adding container %s state to DB: %w", ctr.ID(), err)
+ return fmt.Errorf("adding container %s state to DB: %w", ctr.ID(), err)
}
if ctrNamespace != nil {
if err := newCtrBkt.Put(namespaceKey, ctrNamespace); err != nil {
- return fmt.Errorf("error adding container %s namespace to DB: %w", ctr.ID(), err)
+ return fmt.Errorf("adding container %s namespace to DB: %w", ctr.ID(), err)
}
}
if pod != nil {
if err := newCtrBkt.Put(podIDKey, []byte(pod.ID())); err != nil {
- return fmt.Errorf("error adding container %s pod to DB: %w", ctr.ID(), err)
+ return fmt.Errorf("adding container %s pod to DB: %w", ctr.ID(), err)
}
}
if netNSPath != "" {
if err := newCtrBkt.Put(netNSKey, []byte(netNSPath)); err != nil {
- return fmt.Errorf("error adding container %s netns path to DB: %w", ctr.ID(), err)
+ return fmt.Errorf("adding container %s netns path to DB: %w", ctr.ID(), err)
}
}
if len(networks) > 0 {
ctrNetworksBkt, err := newCtrBkt.CreateBucket(networksBkt)
if err != nil {
- return fmt.Errorf("error creating networks bucket for container %s: %w", ctr.ID(), err)
+ return fmt.Errorf("creating networks bucket for container %s: %w", ctr.ID(), err)
}
for network, opts := range networks {
if err := ctrNetworksBkt.Put([]byte(network), opts); err != nil {
- return fmt.Errorf("error adding network %q to networks bucket for container %s: %w", network, ctr.ID(), err)
+ return fmt.Errorf("adding network %q to networks bucket for container %s: %w", network, ctr.ID(), err)
}
}
}
if _, err := newCtrBkt.CreateBucket(dependenciesBkt); err != nil {
- return fmt.Errorf("error creating dependencies bucket for container %s: %w", ctr.ID(), err)
+ return fmt.Errorf("creating dependencies bucket for container %s: %w", ctr.ID(), err)
}
// Add dependencies for the container
@@ -784,14 +784,14 @@ func (s *BoltState) addContainer(ctr *Container, pod *Pod) error {
return fmt.Errorf("container %s does not have a dependencies bucket: %w", dependsCtr, define.ErrInternal)
}
if err := depCtrDependsBkt.Put(ctrID, ctrName); err != nil {
- return fmt.Errorf("error adding ctr %s as dependency of container %s: %w", ctr.ID(), dependsCtr, err)
+ return fmt.Errorf("adding ctr %s as dependency of container %s: %w", ctr.ID(), dependsCtr, err)
}
}
// Add ctr to pod
if pod != nil && podCtrs != nil {
if err := podCtrs.Put(ctrID, ctrName); err != nil {
- return fmt.Errorf("error adding container %s to pod %s: %w", ctr.ID(), pod.ID(), err)
+ return fmt.Errorf("adding container %s to pod %s: %w", ctr.ID(), pod.ID(), err)
}
}
@@ -804,11 +804,11 @@ func (s *BoltState) addContainer(ctr *Container, pod *Pod) error {
ctrDepsBkt, err := volDB.CreateBucketIfNotExists(volDependenciesBkt)
if err != nil {
- return fmt.Errorf("error creating volume %s dependencies bucket to add container %s: %w", vol.Name, ctr.ID(), err)
+ return fmt.Errorf("creating volume %s dependencies bucket to add container %s: %w", vol.Name, ctr.ID(), err)
}
if depExists := ctrDepsBkt.Get(ctrID); depExists == nil {
if err := ctrDepsBkt.Put(ctrID, ctrID); err != nil {
- return fmt.Errorf("error adding container %s to volume %s dependencies: %w", ctr.ID(), vol.Name, err)
+ return fmt.Errorf("adding container %s to volume %s dependencies: %w", ctr.ID(), vol.Name, err)
}
}
}
@@ -902,7 +902,7 @@ func (s *BoltState) removeContainer(ctr *Container, pod *Pod, tx *bolt.Tx) error
return fmt.Errorf("container %s is not in pod %s: %w", ctr.ID(), pod.ID(), define.ErrNoSuchCtr)
}
if err := podCtrs.Delete(ctrID); err != nil {
- return fmt.Errorf("error removing container %s from pod %s: %w", ctr.ID(), pod.ID(), err)
+ return fmt.Errorf("removing container %s from pod %s: %w", ctr.ID(), pod.ID(), err)
}
}
}
@@ -943,21 +943,21 @@ func (s *BoltState) removeContainer(ctr *Container, pod *Pod, tx *bolt.Tx) error
}
if err := ctrBucket.DeleteBucket(ctrID); err != nil {
- return fmt.Errorf("error deleting container %s from DB: %w", ctr.ID(), define.ErrInternal)
+ return fmt.Errorf("deleting container %s from DB: %w", ctr.ID(), define.ErrInternal)
}
if err := idsBucket.Delete(ctrID); err != nil {
- return fmt.Errorf("error deleting container %s ID in DB: %w", ctr.ID(), err)
+ return fmt.Errorf("deleting container %s ID in DB: %w", ctr.ID(), err)
}
if err := namesBucket.Delete(ctrName); err != nil {
- return fmt.Errorf("error deleting container %s name in DB: %w", ctr.ID(), err)
+ return fmt.Errorf("deleting container %s name in DB: %w", ctr.ID(), err)
}
if err := nsBucket.Delete(ctrID); err != nil {
- return fmt.Errorf("error deleting container %s namespace in DB: %w", ctr.ID(), err)
+ return fmt.Errorf("deleting container %s namespace in DB: %w", ctr.ID(), err)
}
if err := allCtrsBucket.Delete(ctrID); err != nil {
- return fmt.Errorf("error deleting container %s from all containers bucket in DB: %w", ctr.ID(), err)
+ return fmt.Errorf("deleting container %s from all containers bucket in DB: %w", ctr.ID(), err)
}
depCtrs := ctr.Dependencies()
@@ -986,7 +986,7 @@ func (s *BoltState) removeContainer(ctr *Container, pod *Pod, tx *bolt.Tx) error
}
if err := depCtrDependsBkt.Delete(ctrID); err != nil {
- return fmt.Errorf("error removing container %s as a dependency of container %s: %w", ctr.ID(), depCtr, err)
+ return fmt.Errorf("removing container %s as a dependency of container %s: %w", ctr.ID(), depCtr, err)
}
}
@@ -1005,7 +1005,7 @@ func (s *BoltState) removeContainer(ctr *Container, pod *Pod, tx *bolt.Tx) error
}
if depExists := ctrDepsBkt.Get(ctrID); depExists == nil {
if err := ctrDepsBkt.Delete(ctrID); err != nil {
- return fmt.Errorf("error deleting container %s dependency on volume %s: %w", ctr.ID(), vol.Name, err)
+ return fmt.Errorf("deleting container %s dependency on volume %s: %w", ctr.ID(), vol.Name, err)
}
}
}
diff --git a/libpod/boltdb_state_linux.go b/libpod/boltdb_state_linux.go
index 813afd8bf..3d9f5b6a2 100644
--- a/libpod/boltdb_state_linux.go
+++ b/libpod/boltdb_state_linux.go
@@ -30,7 +30,7 @@ func replaceNetNS(netNSPath string, ctr *Container, newState *ContainerState) er
newState.NetNS = ns
} else {
if ctr.ensureState(define.ContainerStateRunning, define.ContainerStatePaused) {
- return fmt.Errorf("error joining network namespace of container %s: %w", ctr.ID(), err)
+ return fmt.Errorf("joining network namespace of container %s: %w", ctr.ID(), err)
}
logrus.Errorf("Joining network namespace for container %s: %v", ctr.ID(), err)
diff --git a/libpod/boltdb_state_unsupported.go b/libpod/boltdb_state_unsupported.go
new file mode 100644
index 000000000..9db1e3c4b
--- /dev/null
+++ b/libpod/boltdb_state_unsupported.go
@@ -0,0 +1,19 @@
+//go:build !linux && !freebsd
+// +build !linux,!freebsd
+
+package libpod
+
+import (
+ "errors"
+)
+
+// replaceNetNS handle network namespace transitions after updating a
+// container's state.
+func replaceNetNS(netNSPath string, ctr *Container, newState *ContainerState) error {
+ return errors.New("replaceNetNS not supported on this platform")
+}
+
+// getNetNSPath retrieves the netns path to be stored in the database
+func getNetNSPath(ctr *Container) string {
+ return ""
+}
diff --git a/libpod/common/common.go b/libpod/common/common.go
deleted file mode 100644
index 34cabeadc..000000000
--- a/libpod/common/common.go
+++ /dev/null
@@ -1,16 +0,0 @@
-package common
-
-// IsTrue determines whether the given string equals "true".
-func IsTrue(str string) bool {
- return str == "true"
-}
-
-// IsFalse determines whether the given string equals "false".
-func IsFalse(str string) bool {
- return str == "false"
-}
-
-// IsValidBool determines whether the given string equals "true" or "false".
-func IsValidBool(str string) bool {
- return IsTrue(str) || IsFalse(str)
-}
diff --git a/libpod/common/signing_options.go b/libpod/common/signing_options.go
deleted file mode 100644
index b7e14be82..000000000
--- a/libpod/common/signing_options.go
+++ /dev/null
@@ -1,10 +0,0 @@
-package common
-
-// SigningOptions encapsulates settings that control whether or not we strip or
-// add signatures to images when writing them.
-type SigningOptions struct {
- // RemoveSignatures directs us to remove any signatures which are already present.
- RemoveSignatures bool
- // SignBy is a key identifier of some kind, indicating that a signature should be generated using the specified private key and stored with the image.
- SignBy string
-}
diff --git a/libpod/container.go b/libpod/container.go
index 4e2d93860..cfffd8ea1 100644
--- a/libpod/container.go
+++ b/libpod/container.go
@@ -124,10 +124,6 @@ type Container struct {
// This is true if a container is restored from a checkpoint.
restoreFromCheckpoint bool
- // Used to query the NOTIFY_SOCKET once along with setting up
- // mounts etc.
- notifySocket string
-
slirp4netnsSubnet *net.IPNet
}
@@ -241,6 +237,9 @@ type ContainerNamedVolume struct {
Dest string `json:"dest"`
// Options are fstab style mount options
Options []string `json:"options,omitempty"`
+ // IsAnonymous sets the named volume as anonymous even if it has a name
+ // This is used for emptyDir volumes from a kube yaml
+ IsAnonymous bool `json:"setAnonymous,omitempty"`
}
// ContainerOverlayVolume is a overlay volume that will be mounted into the
@@ -354,14 +353,16 @@ func (c *Container) specFromState() (*spec.Spec, error) {
returnSpec = new(spec.Spec)
content, err := ioutil.ReadAll(f)
if err != nil {
- return nil, fmt.Errorf("error reading container config: %w", err)
+ return nil, fmt.Errorf("reading container config: %w", err)
}
if err := json.Unmarshal(content, &returnSpec); err != nil {
- return nil, fmt.Errorf("error unmarshalling container config: %w", err)
+ // Malformed spec, just use c.config.Spec instead
+ logrus.Warnf("Error unmarshalling container %s config: %v", c.ID(), err)
+ return c.config.Spec, nil
}
} else if !os.IsNotExist(err) {
// ignore when the file does not exist
- return nil, fmt.Errorf("error opening container config: %w", err)
+ return nil, fmt.Errorf("opening container config: %w", err)
}
return returnSpec, nil
@@ -704,7 +705,7 @@ func (c *Container) Mounted() (bool, string, error) {
c.lock.Lock()
defer c.lock.Unlock()
if err := c.syncContainer(); err != nil {
- return false, "", fmt.Errorf("error updating container %s state: %w", c.ID(), err)
+ return false, "", fmt.Errorf("updating container %s state: %w", c.ID(), err)
}
}
// We cannot directly return c.state.Mountpoint as it is not guaranteed
@@ -734,7 +735,7 @@ func (c *Container) StartedTime() (time.Time, error) {
c.lock.Lock()
defer c.lock.Unlock()
if err := c.syncContainer(); err != nil {
- return time.Time{}, fmt.Errorf("error updating container %s state: %w", c.ID(), err)
+ return time.Time{}, fmt.Errorf("updating container %s state: %w", c.ID(), err)
}
}
return c.state.StartedTime, nil
@@ -746,7 +747,7 @@ func (c *Container) FinishedTime() (time.Time, error) {
c.lock.Lock()
defer c.lock.Unlock()
if err := c.syncContainer(); err != nil {
- return time.Time{}, fmt.Errorf("error updating container %s state: %w", c.ID(), err)
+ return time.Time{}, fmt.Errorf("updating container %s state: %w", c.ID(), err)
}
}
return c.state.FinishedTime, nil
@@ -761,7 +762,7 @@ func (c *Container) ExitCode() (int32, bool, error) {
c.lock.Lock()
defer c.lock.Unlock()
if err := c.syncContainer(); err != nil {
- return 0, false, fmt.Errorf("error updating container %s state: %w", c.ID(), err)
+ return 0, false, fmt.Errorf("updating container %s state: %w", c.ID(), err)
}
}
return c.state.ExitCode, c.state.Exited, nil
@@ -773,7 +774,7 @@ func (c *Container) OOMKilled() (bool, error) {
c.lock.Lock()
defer c.lock.Unlock()
if err := c.syncContainer(); err != nil {
- return false, fmt.Errorf("error updating container %s state: %w", c.ID(), err)
+ return false, fmt.Errorf("updating container %s state: %w", c.ID(), err)
}
}
return c.state.OOMKilled, nil
@@ -860,7 +861,7 @@ func (c *Container) ExecSession(id string) (*ExecSession, error) {
returnSession := new(ExecSession)
if err := JSONDeepCopy(session, returnSession); err != nil {
- return nil, fmt.Errorf("error copying contents of container %s exec session %s: %w", c.ID(), session.ID(), err)
+ return nil, fmt.Errorf("copying contents of container %s exec session %s: %w", c.ID(), session.ID(), err)
}
return returnSession, nil
@@ -920,7 +921,7 @@ func (c *Container) NamespacePath(linuxNS LinuxNS) (string, error) { //nolint:in
c.lock.Lock()
defer c.lock.Unlock()
if err := c.syncContainer(); err != nil {
- return "", fmt.Errorf("error updating container %s state: %w", c.ID(), err)
+ return "", fmt.Errorf("updating container %s state: %w", c.ID(), err)
}
}
@@ -958,7 +959,7 @@ func (c *Container) CgroupPath() (string, error) {
c.lock.Lock()
defer c.lock.Unlock()
if err := c.syncContainer(); err != nil {
- return "", fmt.Errorf("error updating container %s state: %w", c.ID(), err)
+ return "", fmt.Errorf("updating container %s state: %w", c.ID(), err)
}
}
return c.cGroupPath()
@@ -1058,7 +1059,7 @@ func (c *Container) RootFsSize() (int64, error) {
c.lock.Lock()
defer c.lock.Unlock()
if err := c.syncContainer(); err != nil {
- return -1, fmt.Errorf("error updating container %s state: %w", c.ID(), err)
+ return -1, fmt.Errorf("updating container %s state: %w", c.ID(), err)
}
}
return c.rootFsSize()
@@ -1070,7 +1071,7 @@ func (c *Container) RWSize() (int64, error) {
c.lock.Lock()
defer c.lock.Unlock()
if err := c.syncContainer(); err != nil {
- return -1, fmt.Errorf("error updating container %s state: %w", c.ID(), err)
+ return -1, fmt.Errorf("updating container %s state: %w", c.ID(), err)
}
}
return c.rwSize()
@@ -1134,20 +1135,6 @@ func (c *Container) NetworkDisabled() (bool, error) {
return networkDisabled(c)
}
-func networkDisabled(c *Container) (bool, error) {
- if c.config.CreateNetNS {
- return false, nil
- }
- if !c.config.PostConfigureNetNS {
- for _, ns := range c.config.Spec.Linux.Namespaces {
- if ns.Type == spec.NetworkNamespace {
- return ns.Path == "", nil
- }
- }
- }
- return false, nil
-}
-
func (c *Container) HostNetwork() bool {
if c.config.CreateNetNS || c.config.NetNsCtr != "" {
return false
@@ -1172,7 +1159,7 @@ func (c *Container) ContainerState() (*ContainerState, error) {
}
returnConfig := new(ContainerState)
if err := JSONDeepCopy(c.state, returnConfig); err != nil {
- return nil, fmt.Errorf("error copying container %s state: %w", c.ID(), err)
+ return nil, fmt.Errorf("copying container %s state: %w", c.ID(), err)
}
return c.state, nil
}
diff --git a/libpod/container_api.go b/libpod/container_api.go
index 2ff4bfe08..dd47b4d12 100644
--- a/libpod/container_api.go
+++ b/libpod/container_api.go
@@ -16,6 +16,7 @@ import (
"github.com/containers/podman/v4/libpod/events"
"github.com/containers/podman/v4/pkg/signal"
"github.com/containers/storage/pkg/archive"
+ spec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
)
@@ -98,6 +99,15 @@ func (c *Container) Start(ctx context.Context, recursive bool) error {
return c.start()
}
+// Update updates the given container.
+// only the cgroup config can be updated and therefore only a linux resource spec is passed.
+func (c *Container) Update(res *spec.LinuxResources) error {
+ if err := c.syncContainer(); err != nil {
+ return err
+ }
+ return c.update(res)
+}
+
// StartAndAttach starts a container and attaches to it.
// This acts as a combination of the Start and Attach APIs, ensuring proper
// ordering of the two such that no output from the container is lost (e.g. the
@@ -674,7 +684,7 @@ func (c *Container) Cleanup(ctx context.Context) error {
// When the container has already been removed, the OCI runtime directory remain.
if errors.Is(err, define.ErrNoSuchCtr) || errors.Is(err, define.ErrCtrRemoved) {
if err := c.cleanupRuntime(ctx); err != nil {
- return fmt.Errorf("error cleaning up container %s from OCI runtime: %w", c.ID(), err)
+ return fmt.Errorf("cleaning up container %s from OCI runtime: %w", c.ID(), err)
}
return nil
}
diff --git a/libpod/container_commit.go b/libpod/container_commit.go
index c93c9c7bb..f447816e3 100644
--- a/libpod/container_commit.go
+++ b/libpod/container_commit.go
@@ -48,7 +48,7 @@ func (c *Container) Commit(ctx context.Context, destImage string, options Contai
if c.state.State == define.ContainerStateRunning && options.Pause {
if err := c.pause(); err != nil {
- return nil, fmt.Errorf("error pausing container %q to commit: %w", c.ID(), err)
+ return nil, fmt.Errorf("pausing container %q to commit: %w", c.ID(), err)
}
defer func() {
if err := c.unpause(); err != nil {
@@ -202,7 +202,7 @@ func (c *Container) Commit(ctx context.Context, destImage string, options Contai
imageRef, err := is.Transport.ParseStoreReference(c.runtime.store, resolvedImageName)
if err != nil {
- return nil, fmt.Errorf("error parsing target image name %q: %w", destImage, err)
+ return nil, fmt.Errorf("parsing target image name %q: %w", destImage, err)
}
commitRef = imageRef
}
diff --git a/libpod/container_config.go b/libpod/container_config.go
index 544c45a8c..f3585d22c 100644
--- a/libpod/container_config.go
+++ b/libpod/container_config.go
@@ -7,6 +7,7 @@ import (
"github.com/containers/common/libnetwork/types"
"github.com/containers/common/pkg/secrets"
"github.com/containers/image/v5/manifest"
+ "github.com/containers/podman/v4/libpod/define"
"github.com/containers/podman/v4/pkg/namespaces"
"github.com/containers/podman/v4/pkg/specgen"
"github.com/containers/storage"
@@ -386,10 +387,14 @@ type ContainerMiscConfig struct {
IsService bool `json:"isService"`
// SdNotifyMode tells libpod what to do with a NOTIFY_SOCKET if passed
SdNotifyMode string `json:"sdnotifyMode,omitempty"`
+ // SdNotifySocket stores NOTIFY_SOCKET in use by the container
+ SdNotifySocket string `json:"sdnotifySocket,omitempty"`
// Systemd tells libpod to set up the container in systemd mode, a value of nil denotes false
Systemd *bool `json:"systemd,omitempty"`
// HealthCheckConfig has the health check command and related timings
HealthCheckConfig *manifest.Schema2HealthConfig `json:"healthcheck"`
+ // HealthCheckOnFailureAction defines an action to take once the container turns unhealthy.
+ HealthCheckOnFailureAction define.HealthCheckOnFailureAction `json:"healthcheck_on_failure_action"`
// PreserveFDs is a number of additional file descriptors (in addition
// to 0, 1, 2) that will be passed to the executed process. The total FDs
// passed will be 3 + PreserveFDs.
diff --git a/libpod/container_copy_unsupported.go b/libpod/container_copy_unsupported.go
new file mode 100644
index 000000000..62937279a
--- /dev/null
+++ b/libpod/container_copy_unsupported.go
@@ -0,0 +1,17 @@
+//go:build !linux
+// +build !linux
+
+package libpod
+
+import (
+ "errors"
+ "io"
+)
+
+func (c *Container) copyFromArchive(path string, chown, noOverwriteDirNonDir bool, rename map[string]string, reader io.Reader) (func() error, error) {
+ return nil, errors.New("not implemented (*Container) copyFromArchive")
+}
+
+func (c *Container) copyToArchive(path string, writer io.Writer) (func() error, error) {
+ return nil, errors.New("not implemented (*Container) copyToArchive")
+}
diff --git a/libpod/container_exec.go b/libpod/container_exec.go
index d3c80e896..3a2cba52f 100644
--- a/libpod/container_exec.go
+++ b/libpod/container_exec.go
@@ -182,7 +182,7 @@ func (c *Container) ExecCreate(config *ExecConfig) (string, error) {
}
// Generate an ID for our new exec session
- sessionID := stringid.GenerateNonCryptoID()
+ sessionID := stringid.GenerateRandomID()
found := true
// This really ought to be a do-while, but Go doesn't have those...
for found {
@@ -194,7 +194,7 @@ func (c *Container) ExecCreate(config *ExecConfig) (string, error) {
}
}
if found {
- sessionID = stringid.GenerateNonCryptoID()
+ sessionID = stringid.GenerateRandomID()
}
}
@@ -205,7 +205,7 @@ func (c *Container) ExecCreate(config *ExecConfig) (string, error) {
session.State = define.ExecStateCreated
session.Config = new(ExecConfig)
if err := JSONDeepCopy(config, session.Config); err != nil {
- return "", fmt.Errorf("error copying exec configuration into exec session: %w", err)
+ return "", fmt.Errorf("copying exec configuration into exec session: %w", err)
}
if len(session.Config.ExitCommand) > 0 {
@@ -372,7 +372,7 @@ func (c *Container) execStartAndAttach(sessionID string, streams *define.AttachS
if lastErr != nil {
logrus.Errorf("Container %s exec session %s error: %v", c.ID(), session.ID(), lastErr)
}
- return fmt.Errorf("error syncing container %s state to update exec session %s: %w", c.ID(), sessionID, err)
+ return fmt.Errorf("syncing container %s state to update exec session %s: %w", c.ID(), sessionID, err)
}
// Now handle the error from readExecExitCode above.
@@ -809,7 +809,7 @@ func (c *Container) exec(config *ExecConfig, streams *define.AttachStreams, resi
// streaming.
diedEvent, err := c.runtime.GetExecDiedEvent(context.Background(), c.ID(), sessionID)
if err != nil {
- return -1, fmt.Errorf("error retrieving exec session %s exit code: %w", sessionID, err)
+ return -1, fmt.Errorf("retrieving exec session %s exit code: %w", sessionID, err)
}
return diedEvent.ContainerExitCode, nil
}
@@ -911,7 +911,7 @@ func (c *Container) createExecBundle(sessionID string) (retErr error) {
if err := os.MkdirAll(c.execExitFileDir(sessionID), execDirPermission); err != nil {
// The directory is allowed to exist
if !os.IsExist(err) {
- return fmt.Errorf("error creating OCI runtime exit file path %s: %w", c.execExitFileDir(sessionID), err)
+ return fmt.Errorf("creating OCI runtime exit file path %s: %w", c.execExitFileDir(sessionID), err)
}
}
return nil
@@ -1121,7 +1121,7 @@ func writeExecExitCode(c *Container, sessionID string, exitCode int) error {
// need to. Exit without error.
return nil
}
- return fmt.Errorf("error syncing container %s state to remove exec session %s: %w", c.ID(), sessionID, err)
+ return fmt.Errorf("syncing container %s state to remove exec session %s: %w", c.ID(), sessionID, err)
}
return justWriteExecExitCode(c, sessionID, exitCode)
diff --git a/libpod/container_freebsd.go b/libpod/container_freebsd.go
new file mode 100644
index 000000000..87fb494dd
--- /dev/null
+++ b/libpod/container_freebsd.go
@@ -0,0 +1,31 @@
+//go:build freebsd
+// +build freebsd
+
+package libpod
+
+type containerPlatformState struct {
+ // NetNS is the name of the container's network VNET
+ // jail. Will only be set if config.CreateNetNS is true, or
+ // the container was told to join another container's network
+ // namespace.
+ NetNS *jailNetNS `json:"-"`
+}
+
+type jailNetNS struct {
+ Name string `json:"-"`
+}
+
+func (ns *jailNetNS) Path() string {
+ // The jail name approximately corresponds to the Linux netns path
+ return ns.Name
+}
+
+func networkDisabled(c *Container) (bool, error) {
+ if c.config.CreateNetNS {
+ return false, nil
+ }
+ if !c.config.PostConfigureNetNS {
+ return c.state.NetNS != nil, nil
+ }
+ return false, nil
+}
diff --git a/libpod/container_graph.go b/libpod/container_graph.go
index 67b1abc34..d43579e4a 100644
--- a/libpod/container_graph.go
+++ b/libpod/container_graph.go
@@ -160,7 +160,7 @@ func detectCycles(graph *ContainerGraph) (bool, error) {
// Popped item is no longer on the stack, mark as such
topInfo, ok := nodes[topOfStack.id]
if !ok {
- return false, fmt.Errorf("error finding node info for %s: %w", topOfStack.id, define.ErrInternal)
+ return false, fmt.Errorf("finding node info for %s: %w", topOfStack.id, define.ErrInternal)
}
topInfo.onStack = false
@@ -281,3 +281,94 @@ func startNode(ctx context.Context, node *containerNode, setError bool, ctrError
startNode(ctx, successor, ctrErrored, ctrErrors, ctrsVisited, restart)
}
}
+
+// Visit a node on the container graph and remove it, or set an error if it
+// failed to remove. Only intended for use in pod removal; do *not* use when
+// removing individual containers.
+// All containers are assumed to be *UNLOCKED* on running this function.
+// Container locks will be acquired as necessary.
+// Pod and infraID are optional. If a pod is given it must be *LOCKED*.
+func removeNode(ctx context.Context, node *containerNode, pod *Pod, force bool, timeout *uint, setError bool, ctrErrors map[string]error, ctrsVisited map[string]bool, ctrNamedVolumes map[string]*ContainerNamedVolume) {
+ // If we already visited this node, we're done.
+ if ctrsVisited[node.id] {
+ return
+ }
+
+ // Someone who depends on us failed.
+ // Mark us as failed and recurse.
+ if setError {
+ ctrsVisited[node.id] = true
+ ctrErrors[node.id] = fmt.Errorf("a container that depends on container %s could not be removed: %w", node.id, define.ErrCtrStateInvalid)
+
+ // Hit anyone who depends on us, set errors there as well.
+ for _, successor := range node.dependsOn {
+ removeNode(ctx, successor, pod, force, timeout, true, ctrErrors, ctrsVisited, ctrNamedVolumes)
+ }
+ }
+
+ // Does anyone still depend on us?
+ // Cannot remove if true. Once all our dependencies have been removed,
+ // we will be removed.
+ for _, dep := range node.dependedOn {
+ // The container that depends on us hasn't been removed yet.
+ // OK to continue on
+ if ok := ctrsVisited[dep.id]; !ok {
+ return
+ }
+ }
+
+ // Going to try to remove the node, mark us as visited
+ ctrsVisited[node.id] = true
+
+ ctrErrored := false
+
+ // Verify that all that depend on us are gone.
+ // Graph traversal should guarantee this is true, but this isn't that
+ // expensive, and it's better to be safe.
+ for _, dep := range node.dependedOn {
+ if _, err := node.container.runtime.GetContainer(dep.id); err == nil {
+ ctrErrored = true
+ ctrErrors[node.id] = fmt.Errorf("a container that depends on container %s still exists: %w", node.id, define.ErrDepExists)
+ }
+ }
+
+ // Lock the container
+ node.container.lock.Lock()
+
+ // Gate all subsequent bits behind a ctrErrored check - we don't want to
+ // proceed if a previous step failed.
+ if !ctrErrored {
+ if err := node.container.syncContainer(); err != nil {
+ ctrErrored = true
+ ctrErrors[node.id] = err
+ }
+ }
+
+ if !ctrErrored {
+ for _, vol := range node.container.config.NamedVolumes {
+ ctrNamedVolumes[vol.Name] = vol
+ }
+
+ if pod != nil && pod.state.InfraContainerID == node.id {
+ pod.state.InfraContainerID = ""
+ if err := pod.save(); err != nil {
+ ctrErrored = true
+ ctrErrors[node.id] = fmt.Errorf("error removing infra container %s from pod %s: %w", node.id, pod.ID(), err)
+ }
+ }
+ }
+
+ if !ctrErrored {
+ if err := node.container.runtime.removeContainer(ctx, node.container, force, false, true, false, timeout); err != nil {
+ ctrErrored = true
+ ctrErrors[node.id] = err
+ }
+ }
+
+ node.container.lock.Unlock()
+
+ // Recurse to anyone who we depend on and remove them
+ for _, successor := range node.dependsOn {
+ removeNode(ctx, successor, pod, force, timeout, ctrErrored, ctrErrors, ctrsVisited, ctrNamedVolumes)
+ }
+}
diff --git a/libpod/container_inspect.go b/libpod/container_inspect.go
index fa2130a28..e4089efa6 100644
--- a/libpod/container_inspect.go
+++ b/libpod/container_inspect.go
@@ -3,20 +3,15 @@ package libpod
import (
"errors"
"fmt"
- "sort"
"strings"
- "github.com/containers/common/pkg/config"
"github.com/containers/podman/v4/libpod/define"
"github.com/containers/podman/v4/libpod/driver"
"github.com/containers/podman/v4/pkg/util"
"github.com/containers/storage/types"
units "github.com/docker/go-units"
spec "github.com/opencontainers/runtime-spec/specs-go"
- "github.com/opencontainers/runtime-tools/generate"
- "github.com/opencontainers/runtime-tools/validate"
"github.com/sirupsen/logrus"
- "github.com/syndtr/gocapability/capability"
)
// inspectLocked inspects a container for low-level information.
@@ -24,15 +19,15 @@ import (
func (c *Container) inspectLocked(size bool) (*define.InspectContainerData, error) {
storeCtr, err := c.runtime.store.Container(c.ID())
if err != nil {
- return nil, fmt.Errorf("error getting container from store %q: %w", c.ID(), err)
+ return nil, fmt.Errorf("getting container from store %q: %w", c.ID(), err)
}
layer, err := c.runtime.store.Layer(storeCtr.LayerID)
if err != nil {
- return nil, fmt.Errorf("error reading information about layer %q: %w", storeCtr.LayerID, err)
+ return nil, fmt.Errorf("reading information about layer %q: %w", storeCtr.LayerID, err)
}
driverData, err := driver.GetDriverData(c.runtime.store, layer.ID)
if err != nil {
- return nil, fmt.Errorf("error getting graph driver info %q: %w", c.ID(), err)
+ return nil, fmt.Errorf("getting graph driver info %q: %w", c.ID(), err)
}
return c.getContainerInspectData(size, driverData)
}
@@ -163,8 +158,6 @@ func (c *Container) getContainerInspectData(size bool, driverData *define.Driver
Driver: driverData.Name,
MountLabel: config.MountLabel,
ProcessLabel: config.ProcessLabel,
- EffectiveCaps: ctrSpec.Process.Capabilities.Effective,
- BoundingCaps: ctrSpec.Process.Capabilities.Bounding,
AppArmorProfile: ctrSpec.Process.ApparmorProfile,
ExecIDs: execIDs,
GraphDriver: driverData,
@@ -173,6 +166,10 @@ func (c *Container) getContainerInspectData(size bool, driverData *define.Driver
IsInfra: c.IsInfra(),
IsService: c.IsService(),
}
+ if ctrSpec.Process.Capabilities != nil {
+ data.EffectiveCaps = ctrSpec.Process.Capabilities.Effective
+ data.BoundingCaps = ctrSpec.Process.Capabilities.Bounding
+ }
if c.state.ConfigPath != "" {
data.OCIConfigPath = c.state.ConfigPath
@@ -241,7 +238,7 @@ func (c *Container) GetMounts(namedVolumes []*ContainerNamedVolume, imageVolumes
// volume.
volFromDB, err := c.runtime.state.Volume(volume.Name)
if err != nil {
- return nil, fmt.Errorf("error looking up volume %s in container %s config: %w", volume.Name, c.ID(), err)
+ return nil, fmt.Errorf("looking up volume %s in container %s config: %w", volume.Name, c.ID(), err)
}
mountStruct.Driver = volFromDB.Driver()
@@ -390,6 +387,8 @@ func (c *Container) generateInspectContainerConfig(spec *spec.Spec) *define.Insp
// leak.
ctrConfig.Healthcheck = c.config.HealthCheckConfig
+ ctrConfig.HealthcheckOnFailureAction = c.config.HealthCheckOnFailureAction.String()
+
ctrConfig.CreateCommand = c.config.CreateCommand
ctrConfig.Timezone = c.config.Timezone
@@ -414,6 +413,8 @@ func (c *Container) generateInspectContainerConfig(spec *spec.Spec) *define.Insp
ctrConfig.Passwd = c.config.Passwd
ctrConfig.ChrootDirs = append(ctrConfig.ChrootDirs, c.config.ChrootDirs...)
+ ctrConfig.SdNotifyMode = c.config.SdNotifyMode
+ ctrConfig.SdNotifySocket = c.config.SdNotifySocket
return ctrConfig
}
@@ -480,11 +481,6 @@ func (c *Container) generateInspectContainerHostConfig(ctrSpec *spec.Spec, named
hostConfig.ShmSize = c.config.ShmSize
hostConfig.Runtime = "oci"
- // This is very expensive to initialize.
- // So we don't want to initialize it unless we absolutely have to - IE,
- // there are things that require a major:minor to path translation.
- var deviceNodes map[string]string
-
// Annotations
if ctrSpec.Annotations != nil {
hostConfig.ContainerIDFile = ctrSpec.Annotations[define.InspectAnnotationCIDFile]
@@ -502,109 +498,8 @@ func (c *Container) generateInspectContainerHostConfig(ctrSpec *spec.Spec, named
}
}
- // Resource limits
- if ctrSpec.Linux != nil {
- if ctrSpec.Linux.Resources != nil {
- if ctrSpec.Linux.Resources.CPU != nil {
- if ctrSpec.Linux.Resources.CPU.Shares != nil {
- hostConfig.CpuShares = *ctrSpec.Linux.Resources.CPU.Shares
- }
- if ctrSpec.Linux.Resources.CPU.Period != nil {
- hostConfig.CpuPeriod = *ctrSpec.Linux.Resources.CPU.Period
- }
- if ctrSpec.Linux.Resources.CPU.Quota != nil {
- hostConfig.CpuQuota = *ctrSpec.Linux.Resources.CPU.Quota
- }
- if ctrSpec.Linux.Resources.CPU.RealtimePeriod != nil {
- hostConfig.CpuRealtimePeriod = *ctrSpec.Linux.Resources.CPU.RealtimePeriod
- }
- if ctrSpec.Linux.Resources.CPU.RealtimeRuntime != nil {
- hostConfig.CpuRealtimeRuntime = *ctrSpec.Linux.Resources.CPU.RealtimeRuntime
- }
- hostConfig.CpusetCpus = ctrSpec.Linux.Resources.CPU.Cpus
- hostConfig.CpusetMems = ctrSpec.Linux.Resources.CPU.Mems
- }
- if ctrSpec.Linux.Resources.Memory != nil {
- if ctrSpec.Linux.Resources.Memory.Limit != nil {
- hostConfig.Memory = *ctrSpec.Linux.Resources.Memory.Limit
- }
- if ctrSpec.Linux.Resources.Memory.Reservation != nil {
- hostConfig.MemoryReservation = *ctrSpec.Linux.Resources.Memory.Reservation
- }
- if ctrSpec.Linux.Resources.Memory.Swap != nil {
- hostConfig.MemorySwap = *ctrSpec.Linux.Resources.Memory.Swap
- }
- if ctrSpec.Linux.Resources.Memory.Swappiness != nil {
- hostConfig.MemorySwappiness = int64(*ctrSpec.Linux.Resources.Memory.Swappiness)
- } else {
- // Swappiness has a default of -1
- hostConfig.MemorySwappiness = -1
- }
- if ctrSpec.Linux.Resources.Memory.DisableOOMKiller != nil {
- hostConfig.OomKillDisable = *ctrSpec.Linux.Resources.Memory.DisableOOMKiller
- }
- }
- if ctrSpec.Linux.Resources.Pids != nil {
- hostConfig.PidsLimit = ctrSpec.Linux.Resources.Pids.Limit
- }
- hostConfig.CgroupConf = ctrSpec.Linux.Resources.Unified
- if ctrSpec.Linux.Resources.BlockIO != nil {
- if ctrSpec.Linux.Resources.BlockIO.Weight != nil {
- hostConfig.BlkioWeight = *ctrSpec.Linux.Resources.BlockIO.Weight
- }
- hostConfig.BlkioWeightDevice = []define.InspectBlkioWeightDevice{}
- for _, dev := range ctrSpec.Linux.Resources.BlockIO.WeightDevice {
- key := fmt.Sprintf("%d:%d", dev.Major, dev.Minor)
- // TODO: how do we handle LeafWeight vs
- // Weight? For now, ignore anything
- // without Weight set.
- if dev.Weight == nil {
- logrus.Infof("Ignoring weight device %s as it lacks a weight", key)
- continue
- }
- if deviceNodes == nil {
- nodes, err := util.FindDeviceNodes()
- if err != nil {
- return nil, err
- }
- deviceNodes = nodes
- }
- path, ok := deviceNodes[key]
- if !ok {
- logrus.Infof("Could not locate weight device %s in system devices", key)
- continue
- }
- weightDev := define.InspectBlkioWeightDevice{}
- weightDev.Path = path
- weightDev.Weight = *dev.Weight
- hostConfig.BlkioWeightDevice = append(hostConfig.BlkioWeightDevice, weightDev)
- }
-
- readBps, err := blkioDeviceThrottle(deviceNodes, ctrSpec.Linux.Resources.BlockIO.ThrottleReadBpsDevice)
- if err != nil {
- return nil, err
- }
- hostConfig.BlkioDeviceReadBps = readBps
-
- writeBps, err := blkioDeviceThrottle(deviceNodes, ctrSpec.Linux.Resources.BlockIO.ThrottleWriteBpsDevice)
- if err != nil {
- return nil, err
- }
- hostConfig.BlkioDeviceWriteBps = writeBps
-
- readIops, err := blkioDeviceThrottle(deviceNodes, ctrSpec.Linux.Resources.BlockIO.ThrottleReadIOPSDevice)
- if err != nil {
- return nil, err
- }
- hostConfig.BlkioDeviceReadIOps = readIops
-
- writeIops, err := blkioDeviceThrottle(deviceNodes, ctrSpec.Linux.Resources.BlockIO.ThrottleWriteIOPSDevice)
- if err != nil {
- return nil, err
- }
- hostConfig.BlkioDeviceWriteIOps = writeIops
- }
- }
+ if err := c.platformInspectContainerHostConfig(ctrSpec, hostConfig); err != nil {
+ return nil, err
}
// NanoCPUs.
@@ -655,182 +550,6 @@ func (c *Container) generateInspectContainerHostConfig(ctrSpec *spec.Spec, named
hostConfig.PortBindings = make(map[string][]define.InspectHostPort)
}
- // Cap add and cap drop.
- // We need a default set of capabilities to compare against.
- // The OCI generate package has one, and is commonly used, so we'll
- // use it.
- // Problem: there are 5 sets of capabilities.
- // Use the bounding set for this computation, it's the most encompassing
- // (but still not perfect).
- capAdd := []string{}
- capDrop := []string{}
- // No point in continuing if we got a spec without a Process block...
- if ctrSpec.Process != nil {
- // Max an O(1) lookup table for default bounding caps.
- boundingCaps := make(map[string]bool)
- g, err := generate.New("linux")
- if err != nil {
- return nil, err
- }
- if !hostConfig.Privileged {
- for _, cap := range g.Config.Process.Capabilities.Bounding {
- boundingCaps[cap] = true
- }
- } else {
- // If we are privileged, use all caps.
- for _, cap := range capability.List() {
- if g.HostSpecific && cap > validate.LastCap() {
- continue
- }
- boundingCaps[fmt.Sprintf("CAP_%s", strings.ToUpper(cap.String()))] = true
- }
- }
- // Iterate through spec caps.
- // If it's not in default bounding caps, it was added.
- // If it is, delete from the default set. Whatever remains after
- // we finish are the dropped caps.
- for _, cap := range ctrSpec.Process.Capabilities.Bounding {
- if _, ok := boundingCaps[cap]; ok {
- delete(boundingCaps, cap)
- } else {
- capAdd = append(capAdd, cap)
- }
- }
- for cap := range boundingCaps {
- capDrop = append(capDrop, cap)
- }
- // Sort CapDrop so it displays in consistent order (GH #9490)
- sort.Strings(capDrop)
- }
- hostConfig.CapAdd = capAdd
- hostConfig.CapDrop = capDrop
- switch {
- case c.config.IPCNsCtr != "":
- hostConfig.IpcMode = fmt.Sprintf("container:%s", c.config.IPCNsCtr)
- case ctrSpec.Linux != nil:
- // Locate the spec's IPC namespace.
- // If there is none, it's ipc=host.
- // If there is one and it has a path, it's "ns:".
- // If no path, it's default - the empty string.
- for _, ns := range ctrSpec.Linux.Namespaces {
- if ns.Type == spec.IPCNamespace {
- if ns.Path != "" {
- hostConfig.IpcMode = fmt.Sprintf("ns:%s", ns.Path)
- } else {
- break
- }
- }
- }
- case c.config.NoShm:
- hostConfig.IpcMode = "none"
- case c.config.NoShmShare:
- hostConfig.IpcMode = "private"
- }
- if hostConfig.IpcMode == "" {
- hostConfig.IpcMode = "shareable"
- }
-
- // Cgroup namespace mode
- cgroupMode := ""
- if c.config.CgroupNsCtr != "" {
- cgroupMode = fmt.Sprintf("container:%s", c.config.CgroupNsCtr)
- } else if ctrSpec.Linux != nil {
- // Locate the spec's cgroup namespace
- // If there is none, it's cgroup=host.
- // If there is one and it has a path, it's "ns:".
- // If there is no path, it's private.
- for _, ns := range ctrSpec.Linux.Namespaces {
- if ns.Type == spec.CgroupNamespace {
- if ns.Path != "" {
- cgroupMode = fmt.Sprintf("ns:%s", ns.Path)
- } else {
- cgroupMode = "private"
- }
- }
- }
- if cgroupMode == "" {
- cgroupMode = "host"
- }
- }
- hostConfig.CgroupMode = cgroupMode
-
- // Cgroup parent
- // Need to check if it's the default, and not print if so.
- defaultCgroupParent := ""
- switch c.CgroupManager() {
- case config.CgroupfsCgroupsManager:
- defaultCgroupParent = CgroupfsDefaultCgroupParent
- case config.SystemdCgroupsManager:
- defaultCgroupParent = SystemdDefaultCgroupParent
- }
- if c.config.CgroupParent != defaultCgroupParent {
- hostConfig.CgroupParent = c.config.CgroupParent
- }
- hostConfig.CgroupManager = c.CgroupManager()
-
- // PID namespace mode
- pidMode := ""
- if c.config.PIDNsCtr != "" {
- pidMode = fmt.Sprintf("container:%s", c.config.PIDNsCtr)
- } else if ctrSpec.Linux != nil {
- // Locate the spec's PID namespace.
- // If there is none, it's pid=host.
- // If there is one and it has a path, it's "ns:".
- // If there is no path, it's default - the empty string.
- for _, ns := range ctrSpec.Linux.Namespaces {
- if ns.Type == spec.PIDNamespace {
- if ns.Path != "" {
- pidMode = fmt.Sprintf("ns:%s", ns.Path)
- } else {
- pidMode = "private"
- }
- break
- }
- }
- if pidMode == "" {
- pidMode = "host"
- }
- }
- hostConfig.PidMode = pidMode
-
- // UTS namespace mode
- utsMode := c.NamespaceMode(spec.UTSNamespace, ctrSpec)
-
- hostConfig.UTSMode = utsMode
-
- // User namespace mode
- usernsMode := ""
- if c.config.UserNsCtr != "" {
- usernsMode = fmt.Sprintf("container:%s", c.config.UserNsCtr)
- } else if ctrSpec.Linux != nil {
- // Locate the spec's user namespace.
- // If there is none, it's default - the empty string.
- // If there is one, it's "private" if no path, or "ns:" if
- // there's a path.
-
- for _, ns := range ctrSpec.Linux.Namespaces {
- if ns.Type == spec.UserNamespace {
- if ns.Path != "" {
- usernsMode = fmt.Sprintf("ns:%s", ns.Path)
- } else {
- usernsMode = "private"
- }
- }
- }
- }
- hostConfig.UsernsMode = usernsMode
- if c.config.IDMappings.UIDMap != nil && c.config.IDMappings.GIDMap != nil {
- hostConfig.IDMappings = generateIDMappings(c.config.IDMappings)
- }
- // Devices
- // Do not include if privileged - assumed that all devices will be
- // included.
- var err error
- hostConfig.Devices, err = c.GetDevices(hostConfig.Privileged, *ctrSpec, deviceNodes)
- if err != nil {
- return nil, err
- }
-
// Ulimits
hostConfig.Ulimits = []define.InspectUlimit{}
if ctrSpec.Process != nil {
diff --git a/libpod/container_inspect_freebsd.go b/libpod/container_inspect_freebsd.go
new file mode 100644
index 000000000..8b4e8df87
--- /dev/null
+++ b/libpod/container_inspect_freebsd.go
@@ -0,0 +1,17 @@
+package libpod
+
+import (
+ "github.com/containers/podman/v4/libpod/define"
+ spec "github.com/opencontainers/runtime-spec/specs-go"
+)
+
+func (c *Container) platformInspectContainerHostConfig(ctrSpec *spec.Spec, hostConfig *define.InspectContainerHostConfig) error {
+ // Not sure what to put here. FreeBSD jails use pids from the
+ // global pool but can only see their own pids.
+ hostConfig.PidMode = "host"
+
+ // UTS namespace mode
+ hostConfig.UTSMode = c.NamespaceMode(spec.UTSNamespace, ctrSpec)
+
+ return nil
+}
diff --git a/libpod/container_inspect_linux.go b/libpod/container_inspect_linux.go
new file mode 100644
index 000000000..355690d70
--- /dev/null
+++ b/libpod/container_inspect_linux.go
@@ -0,0 +1,306 @@
+package libpod
+
+import (
+ "fmt"
+ "sort"
+ "strings"
+
+ "github.com/containers/common/pkg/config"
+ "github.com/containers/podman/v4/libpod/define"
+ "github.com/containers/podman/v4/pkg/util"
+ spec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/opencontainers/runtime-tools/generate"
+ "github.com/opencontainers/runtime-tools/validate"
+ "github.com/sirupsen/logrus"
+ "github.com/syndtr/gocapability/capability"
+)
+
+func (c *Container) platformInspectContainerHostConfig(ctrSpec *spec.Spec, hostConfig *define.InspectContainerHostConfig) error {
+ // This is very expensive to initialize.
+ // So we don't want to initialize it unless we absolutely have to - IE,
+ // there are things that require a major:minor to path translation.
+ var deviceNodes map[string]string
+
+ // Resource limits
+ if ctrSpec.Linux != nil {
+ if ctrSpec.Linux.Resources != nil {
+ if ctrSpec.Linux.Resources.CPU != nil {
+ if ctrSpec.Linux.Resources.CPU.Shares != nil {
+ hostConfig.CpuShares = *ctrSpec.Linux.Resources.CPU.Shares
+ }
+ if ctrSpec.Linux.Resources.CPU.Period != nil {
+ hostConfig.CpuPeriod = *ctrSpec.Linux.Resources.CPU.Period
+ }
+ if ctrSpec.Linux.Resources.CPU.Quota != nil {
+ hostConfig.CpuQuota = *ctrSpec.Linux.Resources.CPU.Quota
+ }
+ if ctrSpec.Linux.Resources.CPU.RealtimePeriod != nil {
+ hostConfig.CpuRealtimePeriod = *ctrSpec.Linux.Resources.CPU.RealtimePeriod
+ }
+ if ctrSpec.Linux.Resources.CPU.RealtimeRuntime != nil {
+ hostConfig.CpuRealtimeRuntime = *ctrSpec.Linux.Resources.CPU.RealtimeRuntime
+ }
+ hostConfig.CpusetCpus = ctrSpec.Linux.Resources.CPU.Cpus
+ hostConfig.CpusetMems = ctrSpec.Linux.Resources.CPU.Mems
+ }
+ if ctrSpec.Linux.Resources.Memory != nil {
+ if ctrSpec.Linux.Resources.Memory.Limit != nil {
+ hostConfig.Memory = *ctrSpec.Linux.Resources.Memory.Limit
+ }
+ if ctrSpec.Linux.Resources.Memory.Reservation != nil {
+ hostConfig.MemoryReservation = *ctrSpec.Linux.Resources.Memory.Reservation
+ }
+ if ctrSpec.Linux.Resources.Memory.Swap != nil {
+ hostConfig.MemorySwap = *ctrSpec.Linux.Resources.Memory.Swap
+ }
+ if ctrSpec.Linux.Resources.Memory.Swappiness != nil {
+ hostConfig.MemorySwappiness = int64(*ctrSpec.Linux.Resources.Memory.Swappiness)
+ } else {
+ // Swappiness has a default of -1
+ hostConfig.MemorySwappiness = -1
+ }
+ if ctrSpec.Linux.Resources.Memory.DisableOOMKiller != nil {
+ hostConfig.OomKillDisable = *ctrSpec.Linux.Resources.Memory.DisableOOMKiller
+ }
+ }
+ if ctrSpec.Linux.Resources.Pids != nil {
+ hostConfig.PidsLimit = ctrSpec.Linux.Resources.Pids.Limit
+ }
+ hostConfig.CgroupConf = ctrSpec.Linux.Resources.Unified
+ if ctrSpec.Linux.Resources.BlockIO != nil {
+ if ctrSpec.Linux.Resources.BlockIO.Weight != nil {
+ hostConfig.BlkioWeight = *ctrSpec.Linux.Resources.BlockIO.Weight
+ }
+ hostConfig.BlkioWeightDevice = []define.InspectBlkioWeightDevice{}
+ for _, dev := range ctrSpec.Linux.Resources.BlockIO.WeightDevice {
+ key := fmt.Sprintf("%d:%d", dev.Major, dev.Minor)
+ // TODO: how do we handle LeafWeight vs
+ // Weight? For now, ignore anything
+ // without Weight set.
+ if dev.Weight == nil {
+ logrus.Infof("Ignoring weight device %s as it lacks a weight", key)
+ continue
+ }
+ if deviceNodes == nil {
+ nodes, err := util.FindDeviceNodes()
+ if err != nil {
+ return err
+ }
+ deviceNodes = nodes
+ }
+ path, ok := deviceNodes[key]
+ if !ok {
+ logrus.Infof("Could not locate weight device %s in system devices", key)
+ continue
+ }
+ weightDev := define.InspectBlkioWeightDevice{}
+ weightDev.Path = path
+ weightDev.Weight = *dev.Weight
+ hostConfig.BlkioWeightDevice = append(hostConfig.BlkioWeightDevice, weightDev)
+ }
+
+ readBps, err := blkioDeviceThrottle(deviceNodes, ctrSpec.Linux.Resources.BlockIO.ThrottleReadBpsDevice)
+ if err != nil {
+ return err
+ }
+ hostConfig.BlkioDeviceReadBps = readBps
+
+ writeBps, err := blkioDeviceThrottle(deviceNodes, ctrSpec.Linux.Resources.BlockIO.ThrottleWriteBpsDevice)
+ if err != nil {
+ return err
+ }
+ hostConfig.BlkioDeviceWriteBps = writeBps
+
+ readIops, err := blkioDeviceThrottle(deviceNodes, ctrSpec.Linux.Resources.BlockIO.ThrottleReadIOPSDevice)
+ if err != nil {
+ return err
+ }
+ hostConfig.BlkioDeviceReadIOps = readIops
+
+ writeIops, err := blkioDeviceThrottle(deviceNodes, ctrSpec.Linux.Resources.BlockIO.ThrottleWriteIOPSDevice)
+ if err != nil {
+ return err
+ }
+ hostConfig.BlkioDeviceWriteIOps = writeIops
+ }
+ }
+ }
+
+ // Cap add and cap drop.
+ // We need a default set of capabilities to compare against.
+ // The OCI generate package has one, and is commonly used, so we'll
+ // use it.
+ // Problem: there are 5 sets of capabilities.
+ // Use the bounding set for this computation, it's the most encompassing
+ // (but still not perfect).
+ capAdd := []string{}
+ capDrop := []string{}
+ // No point in continuing if we got a spec without a Process block...
+ if ctrSpec.Process != nil {
+ // Max an O(1) lookup table for default bounding caps.
+ boundingCaps := make(map[string]bool)
+ g, err := generate.New("linux")
+ if err != nil {
+ return err
+ }
+ if !hostConfig.Privileged {
+ for _, cap := range g.Config.Process.Capabilities.Bounding {
+ boundingCaps[cap] = true
+ }
+ } else {
+ // If we are privileged, use all caps.
+ for _, cap := range capability.List() {
+ if g.HostSpecific && cap > validate.LastCap() {
+ continue
+ }
+ boundingCaps[fmt.Sprintf("CAP_%s", strings.ToUpper(cap.String()))] = true
+ }
+ }
+ // Iterate through spec caps.
+ // If it's not in default bounding caps, it was added.
+ // If it is, delete from the default set. Whatever remains after
+ // we finish are the dropped caps.
+ for _, cap := range ctrSpec.Process.Capabilities.Bounding {
+ if _, ok := boundingCaps[cap]; ok {
+ delete(boundingCaps, cap)
+ } else {
+ capAdd = append(capAdd, cap)
+ }
+ }
+ for cap := range boundingCaps {
+ capDrop = append(capDrop, cap)
+ }
+ // Sort CapDrop so it displays in consistent order (GH #9490)
+ sort.Strings(capDrop)
+ }
+ hostConfig.CapAdd = capAdd
+ hostConfig.CapDrop = capDrop
+ switch {
+ case c.config.IPCNsCtr != "":
+ hostConfig.IpcMode = fmt.Sprintf("container:%s", c.config.IPCNsCtr)
+ case ctrSpec.Linux != nil:
+ // Locate the spec's IPC namespace.
+ // If there is none, it's ipc=host.
+ // If there is one and it has a path, it's "ns:".
+ // If no path, it's default - the empty string.
+ for _, ns := range ctrSpec.Linux.Namespaces {
+ if ns.Type == spec.IPCNamespace {
+ if ns.Path != "" {
+ hostConfig.IpcMode = fmt.Sprintf("ns:%s", ns.Path)
+ } else {
+ break
+ }
+ }
+ }
+ case c.config.NoShm:
+ hostConfig.IpcMode = "none"
+ case c.config.NoShmShare:
+ hostConfig.IpcMode = "private"
+ }
+ if hostConfig.IpcMode == "" {
+ hostConfig.IpcMode = "shareable"
+ }
+
+ // Cgroup namespace mode
+ cgroupMode := ""
+ if c.config.CgroupNsCtr != "" {
+ cgroupMode = fmt.Sprintf("container:%s", c.config.CgroupNsCtr)
+ } else if ctrSpec.Linux != nil {
+ // Locate the spec's cgroup namespace
+ // If there is none, it's cgroup=host.
+ // If there is one and it has a path, it's "ns:".
+ // If there is no path, it's private.
+ for _, ns := range ctrSpec.Linux.Namespaces {
+ if ns.Type == spec.CgroupNamespace {
+ if ns.Path != "" {
+ cgroupMode = fmt.Sprintf("ns:%s", ns.Path)
+ } else {
+ cgroupMode = "private"
+ }
+ }
+ }
+ if cgroupMode == "" {
+ cgroupMode = "host"
+ }
+ }
+ hostConfig.CgroupMode = cgroupMode
+
+ // Cgroup parent
+ // Need to check if it's the default, and not print if so.
+ defaultCgroupParent := ""
+ switch c.CgroupManager() {
+ case config.CgroupfsCgroupsManager:
+ defaultCgroupParent = CgroupfsDefaultCgroupParent
+ case config.SystemdCgroupsManager:
+ defaultCgroupParent = SystemdDefaultCgroupParent
+ }
+ if c.config.CgroupParent != defaultCgroupParent {
+ hostConfig.CgroupParent = c.config.CgroupParent
+ }
+ hostConfig.CgroupManager = c.CgroupManager()
+
+ // PID namespace mode
+ pidMode := ""
+ if c.config.PIDNsCtr != "" {
+ pidMode = fmt.Sprintf("container:%s", c.config.PIDNsCtr)
+ } else if ctrSpec.Linux != nil {
+ // Locate the spec's PID namespace.
+ // If there is none, it's pid=host.
+ // If there is one and it has a path, it's "ns:".
+ // If there is no path, it's default - the empty string.
+ for _, ns := range ctrSpec.Linux.Namespaces {
+ if ns.Type == spec.PIDNamespace {
+ if ns.Path != "" {
+ pidMode = fmt.Sprintf("ns:%s", ns.Path)
+ } else {
+ pidMode = "private"
+ }
+ break
+ }
+ }
+ if pidMode == "" {
+ pidMode = "host"
+ }
+ }
+ hostConfig.PidMode = pidMode
+
+ // UTS namespace mode
+ utsMode := c.NamespaceMode(spec.UTSNamespace, ctrSpec)
+
+ hostConfig.UTSMode = utsMode
+
+ // User namespace mode
+ usernsMode := ""
+ if c.config.UserNsCtr != "" {
+ usernsMode = fmt.Sprintf("container:%s", c.config.UserNsCtr)
+ } else if ctrSpec.Linux != nil {
+ // Locate the spec's user namespace.
+ // If there is none, it's default - the empty string.
+ // If there is one, it's "private" if no path, or "ns:" if
+ // there's a path.
+
+ for _, ns := range ctrSpec.Linux.Namespaces {
+ if ns.Type == spec.UserNamespace {
+ if ns.Path != "" {
+ usernsMode = fmt.Sprintf("ns:%s", ns.Path)
+ } else {
+ usernsMode = "private"
+ }
+ }
+ }
+ }
+ hostConfig.UsernsMode = usernsMode
+ if c.config.IDMappings.UIDMap != nil && c.config.IDMappings.GIDMap != nil {
+ hostConfig.IDMappings = generateIDMappings(c.config.IDMappings)
+ }
+ // Devices
+ // Do not include if privileged - assumed that all devices will be
+ // included.
+ var err error
+ hostConfig.Devices, err = c.GetDevices(hostConfig.Privileged, *ctrSpec, deviceNodes)
+ if err != nil {
+ return err
+ }
+
+ return nil
+}
diff --git a/libpod/container_internal.go b/libpod/container_internal.go
index bad68991b..994243805 100644
--- a/libpod/container_internal.go
+++ b/libpod/container_internal.go
@@ -27,10 +27,12 @@ import (
cutil "github.com/containers/common/pkg/util"
"github.com/containers/podman/v4/libpod/define"
"github.com/containers/podman/v4/libpod/events"
+ "github.com/containers/podman/v4/libpod/shutdown"
"github.com/containers/podman/v4/pkg/ctime"
"github.com/containers/podman/v4/pkg/lookup"
"github.com/containers/podman/v4/pkg/rootless"
"github.com/containers/podman/v4/pkg/selinux"
+ "github.com/containers/podman/v4/pkg/systemd/notifyproxy"
"github.com/containers/podman/v4/pkg/util"
"github.com/containers/storage"
"github.com/containers/storage/pkg/archive"
@@ -205,7 +207,7 @@ func (c *Container) handleExitFile(exitFile string, fi os.FileInfo) error {
}
statusCode, err := strconv.Atoi(string(statusCodeStr))
if err != nil {
- return fmt.Errorf("error converting exit status code (%q, err) for container %s to int: %w",
+ return fmt.Errorf("converting exit status code (%q, err) for container %s to int: %w",
c.ID(), statusCodeStr, err)
}
c.state.ExitCode = int32(statusCode)
@@ -292,20 +294,8 @@ func (c *Container) handleRestartPolicy(ctx context.Context) (_ bool, retErr err
}
// set up slirp4netns again because slirp4netns will die when conmon exits
- if c.config.NetMode.IsSlirp4netns() {
- err := c.runtime.setupSlirp4netns(c, c.state.NetNS)
- if err != nil {
- return false, err
- }
- }
-
- // set up rootlesskit port forwarder again since it dies when conmon exits
- // we use rootlesskit port forwarder only as rootless and when bridge network is used
- if rootless.IsRootless() && c.config.NetMode.IsBridge() && len(c.config.PortMappings) > 0 {
- err := c.runtime.setupRootlessPortMappingViaRLK(c, c.state.NetNS.Path(), c.state.NetworkStatus)
- if err != nil {
- return false, err
- }
+ if err := c.setupRootlessNetwork(); err != nil {
+ return false, err
}
if c.state.State == define.ContainerStateStopped {
@@ -475,7 +465,7 @@ func (c *Container) setupStorage(ctx context.Context) error {
defOptions, err := storage.GetMountOptions(c.runtime.store.GraphDriverName(), c.runtime.store.GraphOptions())
if err != nil {
- return fmt.Errorf("error getting default mount options: %w", err)
+ return fmt.Errorf("getting default mount options: %w", err)
}
var newOptions []string
for _, opt := range defOptions {
@@ -510,7 +500,7 @@ func (c *Container) setupStorage(ctx context.Context) error {
}
}
if containerInfoErr != nil {
- return fmt.Errorf("error creating container storage: %w", containerInfoErr)
+ return fmt.Errorf("creating container storage: %w", containerInfoErr)
}
// Only reconfig IDMappings if layer was mounted from storage.
@@ -552,7 +542,7 @@ func (c *Container) setupStorage(ctx context.Context) error {
artifacts := filepath.Join(c.config.StaticDir, artifactsDir)
if err := os.MkdirAll(artifacts, 0755); err != nil {
- return fmt.Errorf("error creating artifacts directory: %w", err)
+ return fmt.Errorf("creating artifacts directory: %w", err)
}
return nil
@@ -586,7 +576,7 @@ func (c *Container) teardownStorage() error {
artifacts := filepath.Join(c.config.StaticDir, artifactsDir)
if err := os.RemoveAll(artifacts); err != nil {
- return fmt.Errorf("error removing container %s artifacts %q: %w", c.ID(), artifacts, err)
+ return fmt.Errorf("removing container %s artifacts %q: %w", c.ID(), artifacts, err)
}
if err := c.cleanupStorage(); err != nil {
@@ -603,7 +593,7 @@ func (c *Container) teardownStorage() error {
return nil
}
- return fmt.Errorf("error removing container %s root filesystem: %w", c.ID(), err)
+ return fmt.Errorf("removing container %s root filesystem: %w", c.ID(), err)
}
return nil
@@ -654,7 +644,7 @@ func (c *Container) refresh() error {
// It was lost in the reboot and must be recreated
dir, err := c.runtime.storageService.GetRunDir(c.ID())
if err != nil {
- return fmt.Errorf("error retrieving temporary directory for container %s: %w", c.ID(), err)
+ return fmt.Errorf("retrieving temporary directory for container %s: %w", c.ID(), err)
}
c.state.RunDir = dir
@@ -668,7 +658,7 @@ func (c *Container) refresh() error {
}
root := filepath.Join(c.runtime.config.Engine.TmpDir, "containers-root", c.ID())
if err := os.MkdirAll(root, 0755); err != nil {
- return fmt.Errorf("error creating userNS tmpdir for container %s: %w", c.ID(), err)
+ return fmt.Errorf("creating userNS tmpdir for container %s: %w", c.ID(), err)
}
if err := os.Chown(root, c.RootUID(), c.RootGID()); err != nil {
return err
@@ -678,7 +668,7 @@ func (c *Container) refresh() error {
// We need to pick up a new lock
lock, err := c.runtime.lockManager.AllocateAndRetrieveLock(c.config.LockID)
if err != nil {
- return fmt.Errorf("error acquiring lock %d for container %s: %w", c.config.LockID, c.ID(), err)
+ return fmt.Errorf("acquiring lock %d for container %s: %w", c.config.LockID, c.ID(), err)
}
c.lock = lock
@@ -699,7 +689,7 @@ func (c *Container) refresh() error {
}
if err := c.save(); err != nil {
- return fmt.Errorf("error refreshing state for container %s: %w", c.ID(), err)
+ return fmt.Errorf("refreshing state for container %s: %w", c.ID(), err)
}
// Remove ctl and attach files, which may persist across reboot
@@ -720,22 +710,22 @@ func (c *Container) removeConmonFiles() error {
}
if err := os.Remove(attachFile); err != nil && !os.IsNotExist(err) {
- return fmt.Errorf("error removing container %s attach file: %w", c.ID(), err)
+ return fmt.Errorf("removing container %s attach file: %w", c.ID(), err)
}
ctlFile := filepath.Join(c.bundlePath(), "ctl")
if err := os.Remove(ctlFile); err != nil && !os.IsNotExist(err) {
- return fmt.Errorf("error removing container %s ctl file: %w", c.ID(), err)
+ return fmt.Errorf("removing container %s ctl file: %w", c.ID(), err)
}
winszFile := filepath.Join(c.bundlePath(), "winsz")
if err := os.Remove(winszFile); err != nil && !os.IsNotExist(err) {
- return fmt.Errorf("error removing container %s winsz file: %w", c.ID(), err)
+ return fmt.Errorf("removing container %s winsz file: %w", c.ID(), err)
}
oomFile := filepath.Join(c.bundlePath(), "oom")
if err := os.Remove(oomFile); err != nil && !os.IsNotExist(err) {
- return fmt.Errorf("error removing container %s OOM file: %w", c.ID(), err)
+ return fmt.Errorf("removing container %s OOM file: %w", c.ID(), err)
}
// Remove the exit file so we don't leak memory in tmpfs
@@ -744,7 +734,7 @@ func (c *Container) removeConmonFiles() error {
return err
}
if err := os.Remove(exitFile); err != nil && !os.IsNotExist(err) {
- return fmt.Errorf("error removing container %s exit file: %w", c.ID(), err)
+ return fmt.Errorf("removing container %s exit file: %w", c.ID(), err)
}
return nil
@@ -767,12 +757,12 @@ func (c *Container) export(path string) error {
input, err := archive.Tar(mountPoint, archive.Uncompressed)
if err != nil {
- return fmt.Errorf("error reading container directory %q: %w", c.ID(), err)
+ return fmt.Errorf("reading container directory %q: %w", c.ID(), err)
}
outFile, err := os.Create(path)
if err != nil {
- return fmt.Errorf("error creating file %q: %w", path, err)
+ return fmt.Errorf("creating file %q: %w", path, err)
}
defer outFile.Close()
@@ -788,7 +778,7 @@ func (c *Container) getArtifactPath(name string) string {
// save container state to the database
func (c *Container) save() error {
if err := c.runtime.state.SaveContainer(c); err != nil {
- return fmt.Errorf("error saving container %s state: %w", c.ID(), err)
+ return fmt.Errorf("saving container %s state: %w", c.ID(), err)
}
return nil
}
@@ -842,7 +832,7 @@ func (c *Container) prepareToStart(ctx context.Context, recursive bool) (retErr
func (c *Container) checkDependenciesAndHandleError() error {
notRunning, err := c.checkDependenciesRunning()
if err != nil {
- return fmt.Errorf("error checking dependencies for container %s: %w", c.ID(), err)
+ return fmt.Errorf("checking dependencies for container %s: %w", c.ID(), err)
}
if len(notRunning) > 0 {
depString := strings.Join(notRunning, ",")
@@ -861,7 +851,7 @@ func (c *Container) startDependencies(ctx context.Context) error {
depVisitedCtrs := make(map[string]*Container)
if err := c.getAllDependencies(depVisitedCtrs); err != nil {
- return fmt.Errorf("error starting dependency for container %s: %w", c.ID(), err)
+ return fmt.Errorf("starting dependency for container %s: %w", c.ID(), err)
}
// Because of how Go handles passing slices through functions, a slice cannot grow between function calls
@@ -874,7 +864,7 @@ func (c *Container) startDependencies(ctx context.Context) error {
// Build a dependency graph of containers
graph, err := BuildContainerGraph(depCtrs)
if err != nil {
- return fmt.Errorf("error generating dependency graph for container %s: %w", c.ID(), err)
+ return fmt.Errorf("generating dependency graph for container %s: %w", c.ID(), err)
}
// If there are no containers without dependencies, we can't start
@@ -900,7 +890,7 @@ func (c *Container) startDependencies(ctx context.Context) error {
for _, e := range ctrErrors {
logrus.Errorf("%q", e)
}
- return fmt.Errorf("error starting some containers: %w", define.ErrInternal)
+ return fmt.Errorf("starting some containers: %w", define.ErrInternal)
}
return nil
}
@@ -956,13 +946,13 @@ func (c *Container) checkDependenciesRunning() ([]string, error) {
// Get the dependency container
depCtr, err := c.runtime.state.Container(dep)
if err != nil {
- return nil, fmt.Errorf("error retrieving dependency %s of container %s from state: %w", dep, c.ID(), err)
+ return nil, fmt.Errorf("retrieving dependency %s of container %s from state: %w", dep, c.ID(), err)
}
// Check the status
state, err := depCtr.State()
if err != nil {
- return nil, fmt.Errorf("error retrieving state of dependency %s of container %s: %w", dep, c.ID(), err)
+ return nil, fmt.Errorf("retrieving state of dependency %s of container %s: %w", dep, c.ID(), err)
}
if state != define.ContainerStateRunning && !depCtr.config.IsInfra {
notRunning = append(notRunning, dep)
@@ -1049,6 +1039,13 @@ func (c *Container) init(ctx context.Context, retainRetries bool) error {
}
}
+ // To ensure that we don't lose track of Conmon if hit by a SIGTERM
+ // in the middle of setting up the container, inhibit shutdown signals
+ // until after we save Conmon's PID to the state.
+ // TODO: This can likely be removed once conmon-rs support merges.
+ shutdown.Inhibit()
+ defer shutdown.Uninhibit()
+
// With the spec complete, do an OCI create
if _, err = c.ociRuntime.CreateContainer(c, nil); err != nil {
return err
@@ -1084,6 +1081,7 @@ func (c *Container) init(ctx context.Context, retainRetries bool) error {
if err := c.save(); err != nil {
return err
}
+
if c.config.HealthCheckConfig != nil {
if err := c.createTimer(); err != nil {
logrus.Error(err)
@@ -1224,9 +1222,9 @@ func (c *Container) start() error {
payload += "\n"
payload += daemon.SdNotifyReady
}
- if sent, err := daemon.SdNotify(false, payload); err != nil {
+ if err := notifyproxy.SendMessage(c.config.SdNotifySocket, payload); err != nil {
logrus.Errorf("Notifying systemd of Conmon PID: %s", err.Error())
- } else if sent {
+ } else {
logrus.Debugf("Notify sent successfully")
}
}
@@ -1282,12 +1280,18 @@ func (c *Container) stop(timeout uint) error {
// is held when busy-waiting for the container to be stopped.
c.state.State = define.ContainerStateStopping
if err := c.save(); err != nil {
- return fmt.Errorf("error saving container %s state before stopping: %w", c.ID(), err)
+ return fmt.Errorf("saving container %s state before stopping: %w", c.ID(), err)
}
if !c.batched {
c.lock.Unlock()
}
+ if c.config.HealthCheckConfig != nil {
+ if err := c.removeTransientFiles(context.Background()); err != nil {
+ logrus.Error(err.Error())
+ }
+ }
+
stopErr := c.ociRuntime.StopContainer(c, timeout, all)
if !c.batched {
@@ -1342,7 +1346,7 @@ func (c *Container) stop(timeout uint) error {
}
if err := c.save(); err != nil {
- return fmt.Errorf("error saving container %s state after stopping: %w", c.ID(), err)
+ return fmt.Errorf("saving container %s state after stopping: %w", c.ID(), err)
}
// Wait until we have an exit file, and sync once we do
@@ -1556,7 +1560,7 @@ func (c *Container) mountStorage() (_ string, deferredErr error) {
rootUID, rootGID := c.RootUID(), c.RootGID()
- dirfd, err := unix.Open(mountPoint, unix.O_RDONLY|unix.O_PATH, 0)
+ dirfd, err := openDirectory(mountPoint)
if err != nil {
return "", fmt.Errorf("open mount point: %w", err)
}
@@ -1579,7 +1583,7 @@ func (c *Container) mountStorage() (_ string, deferredErr error) {
return "", fmt.Errorf("resolve /etc in the container: %w", err)
}
- etcInTheContainerFd, err := unix.Open(etcInTheContainerPath, unix.O_RDONLY|unix.O_PATH, 0)
+ etcInTheContainerFd, err := openDirectory(etcInTheContainerPath)
if err != nil {
return "", fmt.Errorf("open /etc in the container: %w", err)
}
@@ -1629,7 +1633,7 @@ func (c *Container) mountNamedVolume(v *ContainerNamedVolume, mountpoint string)
logrus.Debugf("Going to mount named volume %s", v.Name)
vol, err := c.runtime.state.Volume(v.Name)
if err != nil {
- return nil, fmt.Errorf("error retrieving named volume %s for container %s: %w", v.Name, c.ID(), err)
+ return nil, fmt.Errorf("retrieving named volume %s for container %s: %w", v.Name, c.ID(), err)
}
if vol.config.LockID == c.config.LockID {
@@ -1639,7 +1643,7 @@ func (c *Container) mountNamedVolume(v *ContainerNamedVolume, mountpoint string)
defer vol.lock.Unlock()
if vol.needsMount() {
if err := vol.mount(); err != nil {
- return nil, fmt.Errorf("error mounting volume %s for container %s: %w", vol.Name(), c.ID(), err)
+ return nil, fmt.Errorf("mounting volume %s for container %s: %w", vol.Name(), c.ID(), err)
}
}
// The volume may need a copy-up. Check the state.
@@ -1652,7 +1656,7 @@ func (c *Container) mountNamedVolume(v *ContainerNamedVolume, mountpoint string)
srcDir, err := securejoin.SecureJoin(mountpoint, v.Dest)
if err != nil {
- return nil, fmt.Errorf("error calculating destination path to copy up container %s volume %s: %w", c.ID(), vol.Name(), err)
+ return nil, fmt.Errorf("calculating destination path to copy up container %s volume %s: %w", c.ID(), vol.Name(), err)
}
// Do a manual stat on the source directory to verify existence.
// Skip the rest if it exists.
@@ -1663,7 +1667,7 @@ func (c *Container) mountNamedVolume(v *ContainerNamedVolume, mountpoint string)
// up.
return vol, nil
}
- return nil, fmt.Errorf("error identifying source directory for copy up into volume %s: %w", vol.Name(), err)
+ return nil, fmt.Errorf("identifying source directory for copy up into volume %s: %w", vol.Name(), err)
}
// If it's not a directory we're mounting over it.
if !srcStat.IsDir() {
@@ -1673,9 +1677,9 @@ func (c *Container) mountNamedVolume(v *ContainerNamedVolume, mountpoint string)
// a bizarre issue where something copier.Get will ENOENT on
// empty directories and sometimes it will not.
// RHBZ#1928643
- srcContents, err := ioutil.ReadDir(srcDir)
+ srcContents, err := os.ReadDir(srcDir)
if err != nil {
- return nil, fmt.Errorf("error reading contents of source directory for copy up into volume %s: %w", vol.Name(), err)
+ return nil, fmt.Errorf("reading contents of source directory for copy up into volume %s: %w", vol.Name(), err)
}
if len(srcContents) == 0 {
return vol, nil
@@ -1683,9 +1687,9 @@ func (c *Container) mountNamedVolume(v *ContainerNamedVolume, mountpoint string)
// If the volume is not empty, we should not copy up.
volMount := vol.mountPoint()
- contents, err := ioutil.ReadDir(volMount)
+ contents, err := os.ReadDir(volMount)
if err != nil {
- return nil, fmt.Errorf("error listing contents of volume %s mountpoint when copying up from container %s: %w", vol.Name(), c.ID(), err)
+ return nil, fmt.Errorf("listing contents of volume %s mountpoint when copying up from container %s: %w", vol.Name(), c.ID(), err)
}
if len(contents) > 0 {
// The volume is not empty. It was likely modified
@@ -1727,11 +1731,11 @@ func (c *Container) mountNamedVolume(v *ContainerNamedVolume, mountpoint string)
if err2 != nil {
logrus.Errorf("Streaming contents of container %s directory for volume copy-up: %v", c.ID(), err2)
}
- return nil, fmt.Errorf("error copying up to volume %s: %w", vol.Name(), err)
+ return nil, fmt.Errorf("copying up to volume %s: %w", vol.Name(), err)
}
if err := <-errChan; err != nil {
- return nil, fmt.Errorf("error streaming container content for copy up into volume %s: %w", vol.Name(), err)
+ return nil, fmt.Errorf("streaming container content for copy up into volume %s: %w", vol.Name(), err)
}
}
return vol, nil
@@ -1814,7 +1818,7 @@ func (c *Container) cleanupStorage() error {
if cleanupErr != nil {
logrus.Errorf("Unmounting container %s: %v", c.ID(), cleanupErr)
}
- cleanupErr = fmt.Errorf("error retrieving named volume %s for container %s: %w", v.Name, c.ID(), err)
+ cleanupErr = fmt.Errorf("retrieving named volume %s for container %s: %w", v.Name, c.ID(), err)
// We need to try and unmount every volume, so continue
// if they fail.
@@ -1827,7 +1831,7 @@ func (c *Container) cleanupStorage() error {
if cleanupErr != nil {
logrus.Errorf("Unmounting container %s: %v", c.ID(), cleanupErr)
}
- cleanupErr = fmt.Errorf("error unmounting volume %s for container %s: %w", vol.Name(), c.ID(), err)
+ cleanupErr = fmt.Errorf("unmounting volume %s for container %s: %w", vol.Name(), c.ID(), err)
}
vol.lock.Unlock()
}
@@ -1852,7 +1856,7 @@ func (c *Container) cleanup(ctx context.Context) error {
// Clean up network namespace, if present
if err := c.cleanupNetwork(); err != nil {
- lastError = fmt.Errorf("error removing container %s network: %w", c.ID(), err)
+ lastError = fmt.Errorf("removing container %s network: %w", c.ID(), err)
}
// cleanup host entry if it is shared
@@ -1890,7 +1894,7 @@ func (c *Container) cleanup(ctx context.Context) error {
if lastError != nil {
logrus.Errorf("Unmounting container %s storage: %v", c.ID(), err)
} else {
- lastError = fmt.Errorf("error unmounting container %s storage: %w", c.ID(), err)
+ lastError = fmt.Errorf("unmounting container %s storage: %w", c.ID(), err)
}
}
@@ -1974,7 +1978,7 @@ func (c *Container) stopPodIfNeeded(ctx context.Context) error {
// hooks.
func (c *Container) delete(ctx context.Context) error {
if err := c.ociRuntime.DeleteContainer(c); err != nil {
- return fmt.Errorf("error removing container %s from runtime: %w", c.ID(), err)
+ return fmt.Errorf("removing container %s from runtime: %w", c.ID(), err)
}
if err := c.postDeleteHooks(ctx); err != nil {
@@ -2037,7 +2041,7 @@ func (c *Container) writeStringToRundir(destFile, contents string) (string, erro
destFileName := filepath.Join(c.state.RunDir, destFile)
if err := os.Remove(destFileName); err != nil && !os.IsNotExist(err) {
- return "", fmt.Errorf("error removing %s for container %s: %w", destFile, c.ID(), err)
+ return "", fmt.Errorf("removing %s for container %s: %w", destFile, c.ID(), err)
}
if err := writeStringToPath(destFileName, contents, c.config.MountLabel, c.RootUID(), c.RootGID()); err != nil {
@@ -2071,22 +2075,22 @@ func (c *Container) saveSpec(spec *spec.Spec) error {
jsonPath := filepath.Join(c.bundlePath(), "config.json")
if _, err := os.Stat(jsonPath); err != nil {
if !os.IsNotExist(err) {
- return fmt.Errorf("error doing stat on container %s spec: %w", c.ID(), err)
+ return fmt.Errorf("doing stat on container %s spec: %w", c.ID(), err)
}
// The spec does not exist, we're fine
} else {
// The spec exists, need to remove it
if err := os.Remove(jsonPath); err != nil {
- return fmt.Errorf("error replacing runtime spec for container %s: %w", c.ID(), err)
+ return fmt.Errorf("replacing runtime spec for container %s: %w", c.ID(), err)
}
}
fileJSON, err := json.Marshal(spec)
if err != nil {
- return fmt.Errorf("error exporting runtime spec for container %s to JSON: %w", c.ID(), err)
+ return fmt.Errorf("exporting runtime spec for container %s to JSON: %w", c.ID(), err)
}
if err := ioutil.WriteFile(jsonPath, fileJSON, 0644); err != nil {
- return fmt.Errorf("error writing runtime spec JSON for container %s to disk: %w", c.ID(), err)
+ return fmt.Errorf("writing runtime spec JSON for container %s to disk: %w", c.ID(), err)
}
logrus.Debugf("Created OCI spec for container %s at %s", c.ID(), jsonPath)
@@ -2154,11 +2158,11 @@ func (c *Container) mount() (string, error) {
mountPoint, err := c.runtime.storageService.MountContainerImage(c.ID())
if err != nil {
- return "", fmt.Errorf("error mounting storage for container %s: %w", c.ID(), err)
+ return "", fmt.Errorf("mounting storage for container %s: %w", c.ID(), err)
}
mountPoint, err = filepath.EvalSymlinks(mountPoint)
if err != nil {
- return "", fmt.Errorf("error resolving storage path for container %s: %w", c.ID(), err)
+ return "", fmt.Errorf("resolving storage path for container %s: %w", c.ID(), err)
}
if err := os.Chown(mountPoint, c.RootUID(), c.RootGID()); err != nil {
return "", fmt.Errorf("cannot chown %s to %d:%d: %w", mountPoint, c.RootUID(), c.RootGID(), err)
@@ -2170,7 +2174,7 @@ func (c *Container) mount() (string, error) {
func (c *Container) unmount(force bool) error {
// Also unmount storage
if _, err := c.runtime.storageService.UnmountContainerImage(c.ID(), force); err != nil {
- return fmt.Errorf("error unmounting container %s root filesystem: %w", c.ID(), err)
+ return fmt.Errorf("unmounting container %s root filesystem: %w", c.ID(), err)
}
return nil
@@ -2299,7 +2303,7 @@ func (c *Container) checkExitFile() error {
return nil
}
- return fmt.Errorf("error running stat on container %s exit file: %w", c.ID(), err)
+ return fmt.Errorf("running stat on container %s exit file: %w", c.ID(), err)
}
// Alright, it exists. Transition to Stopped state.
@@ -2354,3 +2358,12 @@ func (c *Container) extractSecretToCtrStorage(secr *ContainerSecret) error {
}
return nil
}
+
+// update calls the ociRuntime update function to modify a cgroup config after container creation
+func (c *Container) update(resources *spec.LinuxResources) error {
+ if err := c.ociRuntime.UpdateContainer(c, resources); err != nil {
+ return err
+ }
+ logrus.Debugf("updated container %s", c.ID())
+ return nil
+}
diff --git a/libpod/container_internal_common.go b/libpod/container_internal_common.go
new file mode 100644
index 000000000..9c4a3bb67
--- /dev/null
+++ b/libpod/container_internal_common.go
@@ -0,0 +1,2694 @@
+//go:build linux || freebsd
+// +build linux freebsd
+
+package libpod
+
+import (
+ "context"
+ "errors"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "math"
+ "os"
+ "os/user"
+ "path"
+ "path/filepath"
+ "strconv"
+ "strings"
+ "syscall"
+ "time"
+
+ metadata "github.com/checkpoint-restore/checkpointctl/lib"
+ "github.com/checkpoint-restore/go-criu/v5/stats"
+ cdi "github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
+ "github.com/containers/buildah"
+ "github.com/containers/buildah/pkg/chrootuser"
+ "github.com/containers/buildah/pkg/overlay"
+ butil "github.com/containers/buildah/util"
+ "github.com/containers/common/libnetwork/etchosts"
+ "github.com/containers/common/libnetwork/resolvconf"
+ "github.com/containers/common/libnetwork/types"
+ "github.com/containers/common/pkg/apparmor"
+ "github.com/containers/common/pkg/chown"
+ "github.com/containers/common/pkg/config"
+ "github.com/containers/common/pkg/subscriptions"
+ "github.com/containers/common/pkg/umask"
+ cutil "github.com/containers/common/pkg/util"
+ is "github.com/containers/image/v5/storage"
+ "github.com/containers/podman/v4/libpod/define"
+ "github.com/containers/podman/v4/libpod/events"
+ "github.com/containers/podman/v4/pkg/annotations"
+ "github.com/containers/podman/v4/pkg/checkpoint/crutils"
+ "github.com/containers/podman/v4/pkg/criu"
+ "github.com/containers/podman/v4/pkg/lookup"
+ "github.com/containers/podman/v4/pkg/rootless"
+ "github.com/containers/podman/v4/pkg/util"
+ "github.com/containers/podman/v4/version"
+ "github.com/containers/storage/pkg/archive"
+ "github.com/containers/storage/pkg/idtools"
+ "github.com/containers/storage/pkg/lockfile"
+ securejoin "github.com/cyphar/filepath-securejoin"
+ runcuser "github.com/opencontainers/runc/libcontainer/user"
+ spec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/opencontainers/runtime-tools/generate"
+ "github.com/opencontainers/selinux/go-selinux"
+ "github.com/opencontainers/selinux/go-selinux/label"
+ "github.com/sirupsen/logrus"
+)
+
+// Internal only function which returns upper and work dir from
+// overlay options.
+func getOverlayUpperAndWorkDir(options []string) (string, string, error) {
+ upperDir := ""
+ workDir := ""
+ for _, o := range options {
+ if strings.HasPrefix(o, "upperdir") {
+ splitOpt := strings.SplitN(o, "=", 2)
+ if len(splitOpt) > 1 {
+ upperDir = splitOpt[1]
+ if upperDir == "" {
+ return "", "", errors.New("cannot accept empty value for upperdir")
+ }
+ }
+ }
+ if strings.HasPrefix(o, "workdir") {
+ splitOpt := strings.SplitN(o, "=", 2)
+ if len(splitOpt) > 1 {
+ workDir = splitOpt[1]
+ if workDir == "" {
+ return "", "", errors.New("cannot accept empty value for workdir")
+ }
+ }
+ }
+ }
+ if (upperDir != "" && workDir == "") || (upperDir == "" && workDir != "") {
+ return "", "", errors.New("must specify both upperdir and workdir")
+ }
+ return upperDir, workDir, nil
+}
+
+// Generate spec for a container
+// Accepts a map of the container's dependencies
+func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
+ overrides := c.getUserOverrides()
+ execUser, err := lookup.GetUserGroupInfo(c.state.Mountpoint, c.config.User, overrides)
+ if err != nil {
+ if cutil.StringInSlice(c.config.User, c.config.HostUsers) {
+ execUser, err = lookupHostUser(c.config.User)
+ }
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ // NewFromSpec() is deprecated according to its comment
+ // however the recommended replace just causes a nil map panic
+ //nolint:staticcheck
+ g := generate.NewFromSpec(c.config.Spec)
+
+ // If the flag to mount all devices is set for a privileged container, add
+ // all the devices from the host's machine into the container
+ if c.config.MountAllDevices {
+ if err := util.AddPrivilegedDevices(&g); err != nil {
+ return nil, err
+ }
+ }
+
+ // If network namespace was requested, add it now
+ if err := c.addNetworkNamespace(&g); err != nil {
+ return nil, err
+ }
+
+ // Apply AppArmor checks and load the default profile if needed.
+ if len(c.config.Spec.Process.ApparmorProfile) > 0 {
+ updatedProfile, err := apparmor.CheckProfileAndLoadDefault(c.config.Spec.Process.ApparmorProfile)
+ if err != nil {
+ return nil, err
+ }
+ g.SetProcessApparmorProfile(updatedProfile)
+ }
+
+ if err := c.makeBindMounts(); err != nil {
+ return nil, err
+ }
+
+ if err := c.mountNotifySocket(g); err != nil {
+ return nil, err
+ }
+
+ // Get host UID and GID based on the container process UID and GID.
+ hostUID, hostGID, err := butil.GetHostIDs(util.IDtoolsToRuntimeSpec(c.config.IDMappings.UIDMap), util.IDtoolsToRuntimeSpec(c.config.IDMappings.GIDMap), uint32(execUser.Uid), uint32(execUser.Gid))
+ if err != nil {
+ return nil, err
+ }
+
+ // Add named volumes
+ for _, namedVol := range c.config.NamedVolumes {
+ volume, err := c.runtime.GetVolume(namedVol.Name)
+ if err != nil {
+ return nil, fmt.Errorf("retrieving volume %s to add to container %s: %w", namedVol.Name, c.ID(), err)
+ }
+ mountPoint, err := volume.MountPoint()
+ if err != nil {
+ return nil, err
+ }
+
+ overlayFlag := false
+ upperDir := ""
+ workDir := ""
+ for _, o := range namedVol.Options {
+ if o == "O" {
+ overlayFlag = true
+ upperDir, workDir, err = getOverlayUpperAndWorkDir(namedVol.Options)
+ if err != nil {
+ return nil, err
+ }
+ }
+ }
+
+ if overlayFlag {
+ var overlayMount spec.Mount
+ var overlayOpts *overlay.Options
+ contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID())
+ if err != nil {
+ return nil, err
+ }
+
+ overlayOpts = &overlay.Options{RootUID: c.RootUID(),
+ RootGID: c.RootGID(),
+ UpperDirOptionFragment: upperDir,
+ WorkDirOptionFragment: workDir,
+ GraphOpts: c.runtime.store.GraphOptions(),
+ }
+
+ overlayMount, err = overlay.MountWithOptions(contentDir, mountPoint, namedVol.Dest, overlayOpts)
+ if err != nil {
+ return nil, fmt.Errorf("mounting overlay failed %q: %w", mountPoint, err)
+ }
+
+ for _, o := range namedVol.Options {
+ if o == "U" {
+ if err := c.ChangeHostPathOwnership(mountPoint, true, int(hostUID), int(hostGID)); err != nil {
+ return nil, err
+ }
+
+ if err := c.ChangeHostPathOwnership(contentDir, true, int(hostUID), int(hostGID)); err != nil {
+ return nil, err
+ }
+ }
+ }
+ g.AddMount(overlayMount)
+ } else {
+ volMount := spec.Mount{
+ Type: define.TypeBind,
+ Source: mountPoint,
+ Destination: namedVol.Dest,
+ Options: namedVol.Options,
+ }
+ g.AddMount(volMount)
+ }
+ }
+
+ // Check if the spec file mounts contain the options z, Z or U.
+ // If they have z or Z, relabel the source directory and then remove the option.
+ // If they have U, chown the source directory and them remove the option.
+ for i := range g.Config.Mounts {
+ m := &g.Config.Mounts[i]
+ var options []string
+ for _, o := range m.Options {
+ switch o {
+ case "U":
+ if m.Type == "tmpfs" {
+ options = append(options, []string{fmt.Sprintf("uid=%d", execUser.Uid), fmt.Sprintf("gid=%d", execUser.Gid)}...)
+ } else {
+ // only chown on initial creation of container
+ if err := c.ChangeHostPathOwnership(m.Source, true, int(hostUID), int(hostGID)); err != nil {
+ return nil, err
+ }
+ }
+ case "z":
+ fallthrough
+ case "Z":
+ if err := c.relabel(m.Source, c.MountLabel(), label.IsShared(o)); err != nil {
+ return nil, err
+ }
+
+ default:
+ options = append(options, o)
+ }
+ }
+ m.Options = options
+ }
+
+ c.setProcessLabel(&g)
+ c.setMountLabel(&g)
+
+ // Add bind mounts to container
+ for dstPath, srcPath := range c.state.BindMounts {
+ newMount := spec.Mount{
+ Type: define.TypeBind,
+ Source: srcPath,
+ Destination: dstPath,
+ Options: bindOptions,
+ }
+ if c.IsReadOnly() && dstPath != "/dev/shm" {
+ newMount.Options = append(newMount.Options, "ro", "nosuid", "noexec", "nodev")
+ }
+ if dstPath == "/dev/shm" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir {
+ newMount.Options = append(newMount.Options, "nosuid", "noexec", "nodev")
+ }
+ if !MountExists(g.Mounts(), dstPath) {
+ g.AddMount(newMount)
+ } else {
+ logrus.Infof("User mount overriding libpod mount at %q", dstPath)
+ }
+ }
+
+ // Add overlay volumes
+ for _, overlayVol := range c.config.OverlayVolumes {
+ upperDir, workDir, err := getOverlayUpperAndWorkDir(overlayVol.Options)
+ if err != nil {
+ return nil, err
+ }
+ contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID())
+ if err != nil {
+ return nil, err
+ }
+ overlayOpts := &overlay.Options{RootUID: c.RootUID(),
+ RootGID: c.RootGID(),
+ UpperDirOptionFragment: upperDir,
+ WorkDirOptionFragment: workDir,
+ GraphOpts: c.runtime.store.GraphOptions(),
+ }
+
+ overlayMount, err := overlay.MountWithOptions(contentDir, overlayVol.Source, overlayVol.Dest, overlayOpts)
+ if err != nil {
+ return nil, fmt.Errorf("mounting overlay failed %q: %w", overlayVol.Source, err)
+ }
+
+ // Check overlay volume options
+ for _, o := range overlayVol.Options {
+ if o == "U" {
+ if err := c.ChangeHostPathOwnership(overlayVol.Source, true, int(hostUID), int(hostGID)); err != nil {
+ return nil, err
+ }
+
+ if err := c.ChangeHostPathOwnership(contentDir, true, int(hostUID), int(hostGID)); err != nil {
+ return nil, err
+ }
+ }
+ }
+
+ g.AddMount(overlayMount)
+ }
+
+ // Add image volumes as overlay mounts
+ for _, volume := range c.config.ImageVolumes {
+ // Mount the specified image.
+ img, _, err := c.runtime.LibimageRuntime().LookupImage(volume.Source, nil)
+ if err != nil {
+ return nil, fmt.Errorf("creating image volume %q:%q: %w", volume.Source, volume.Dest, err)
+ }
+ mountPoint, err := img.Mount(ctx, nil, "")
+ if err != nil {
+ return nil, fmt.Errorf("mounting image volume %q:%q: %w", volume.Source, volume.Dest, err)
+ }
+
+ contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID())
+ if err != nil {
+ return nil, fmt.Errorf("failed to create TempDir in the %s directory: %w", c.config.StaticDir, err)
+ }
+
+ var overlayMount spec.Mount
+ if volume.ReadWrite {
+ overlayMount, err = overlay.Mount(contentDir, mountPoint, volume.Dest, c.RootUID(), c.RootGID(), c.runtime.store.GraphOptions())
+ } else {
+ overlayMount, err = overlay.MountReadOnly(contentDir, mountPoint, volume.Dest, c.RootUID(), c.RootGID(), c.runtime.store.GraphOptions())
+ }
+ if err != nil {
+ return nil, fmt.Errorf("creating overlay mount for image %q failed: %w", volume.Source, err)
+ }
+ g.AddMount(overlayMount)
+ }
+
+ hasHomeSet := false
+ for _, s := range c.config.Spec.Process.Env {
+ if strings.HasPrefix(s, "HOME=") {
+ hasHomeSet = true
+ break
+ }
+ }
+ if !hasHomeSet && execUser.Home != "" {
+ c.config.Spec.Process.Env = append(c.config.Spec.Process.Env, fmt.Sprintf("HOME=%s", execUser.Home))
+ }
+
+ if c.config.User != "" {
+ // User and Group must go together
+ g.SetProcessUID(uint32(execUser.Uid))
+ g.SetProcessGID(uint32(execUser.Gid))
+ g.AddProcessAdditionalGid(uint32(execUser.Gid))
+ }
+
+ if c.config.Umask != "" {
+ decVal, err := strconv.ParseUint(c.config.Umask, 8, 32)
+ if err != nil {
+ return nil, fmt.Errorf("invalid Umask Value: %w", err)
+ }
+ umask := uint32(decVal)
+ g.Config.Process.User.Umask = &umask
+ }
+
+ // Add addition groups if c.config.GroupAdd is not empty
+ if len(c.config.Groups) > 0 {
+ gids, err := lookup.GetContainerGroups(c.config.Groups, c.state.Mountpoint, overrides)
+ if err != nil {
+ return nil, fmt.Errorf("looking up supplemental groups for container %s: %w", c.ID(), err)
+ }
+ for _, gid := range gids {
+ g.AddProcessAdditionalGid(gid)
+ }
+ }
+
+ if err := c.addSystemdMounts(&g); err != nil {
+ return nil, err
+ }
+
+ // Look up and add groups the user belongs to, if a group wasn't directly specified
+ if !strings.Contains(c.config.User, ":") {
+ // the gidMappings that are present inside the container user namespace
+ var gidMappings []idtools.IDMap
+
+ switch {
+ case len(c.config.IDMappings.GIDMap) > 0:
+ gidMappings = c.config.IDMappings.GIDMap
+ case rootless.IsRootless():
+ // Check whether the current user namespace has enough gids available.
+ availableGids, err := rootless.GetAvailableGids()
+ if err != nil {
+ return nil, fmt.Errorf("cannot read number of available GIDs: %w", err)
+ }
+ gidMappings = []idtools.IDMap{{
+ ContainerID: 0,
+ HostID: 0,
+ Size: int(availableGids),
+ }}
+ default:
+ gidMappings = []idtools.IDMap{{
+ ContainerID: 0,
+ HostID: 0,
+ Size: math.MaxInt32,
+ }}
+ }
+ for _, gid := range execUser.Sgids {
+ isGIDAvailable := false
+ for _, m := range gidMappings {
+ if gid >= m.ContainerID && gid < m.ContainerID+m.Size {
+ isGIDAvailable = true
+ break
+ }
+ }
+ if isGIDAvailable {
+ g.AddProcessAdditionalGid(uint32(gid))
+ } else {
+ logrus.Warnf("Additional gid=%d is not present in the user namespace, skip setting it", gid)
+ }
+ }
+ }
+
+ // Add shared namespaces from other containers
+ if err := c.addSharedNamespaces(&g); err != nil {
+ return nil, err
+ }
+
+ g.SetRootPath(c.state.Mountpoint)
+ g.AddAnnotation(annotations.Created, c.config.CreatedTime.Format(time.RFC3339Nano))
+ g.AddAnnotation("org.opencontainers.image.stopSignal", fmt.Sprintf("%d", c.config.StopSignal))
+
+ if _, exists := g.Config.Annotations[annotations.ContainerManager]; !exists {
+ g.AddAnnotation(annotations.ContainerManager, annotations.ContainerManagerLibpod)
+ }
+
+ if err := c.setCgroupsPath(&g); err != nil {
+ return nil, err
+ }
+
+ // Warning: CDI may alter g.Config in place.
+ if len(c.config.CDIDevices) > 0 {
+ registry := cdi.GetRegistry(
+ cdi.WithAutoRefresh(false),
+ )
+ if err := registry.Refresh(); err != nil {
+ logrus.Debugf("The following error was triggered when refreshing the CDI registry: %v", err)
+ }
+ _, err := registry.InjectDevices(g.Config, c.config.CDIDevices...)
+ if err != nil {
+ return nil, fmt.Errorf("setting up CDI devices: %w", err)
+ }
+ }
+
+ // Mounts need to be sorted so paths will not cover other paths
+ mounts := sortMounts(g.Mounts())
+ g.ClearMounts()
+
+ for _, m := range mounts {
+ // We need to remove all symlinks from tmpfs mounts.
+ // Runc and other runtimes may choke on them.
+ // Easy solution: use securejoin to do a scoped evaluation of
+ // the links, then trim off the mount prefix.
+ if m.Type == "tmpfs" {
+ finalPath, err := securejoin.SecureJoin(c.state.Mountpoint, m.Destination)
+ if err != nil {
+ return nil, fmt.Errorf("resolving symlinks for mount destination %s: %w", m.Destination, err)
+ }
+ trimmedPath := strings.TrimPrefix(finalPath, strings.TrimSuffix(c.state.Mountpoint, "/"))
+ m.Destination = trimmedPath
+ }
+ g.AddMount(m)
+ }
+
+ if err := c.addRootPropagation(&g, mounts); err != nil {
+ return nil, err
+ }
+
+ // Warning: precreate hooks may alter g.Config in place.
+ if c.state.ExtensionStageHooks, err = c.setupOCIHooks(ctx, g.Config); err != nil {
+ return nil, fmt.Errorf("setting up OCI Hooks: %w", err)
+ }
+ if len(c.config.EnvSecrets) > 0 {
+ manager, err := c.runtime.SecretsManager()
+ if err != nil {
+ return nil, err
+ }
+ if err != nil {
+ return nil, err
+ }
+ for name, secr := range c.config.EnvSecrets {
+ _, data, err := manager.LookupSecretData(secr.Name)
+ if err != nil {
+ return nil, err
+ }
+ g.AddProcessEnv(name, string(data))
+ }
+ }
+
+ // Pass down the LISTEN_* environment (see #10443).
+ for _, key := range []string{"LISTEN_PID", "LISTEN_FDS", "LISTEN_FDNAMES"} {
+ if val, ok := os.LookupEnv(key); ok {
+ // Force the PID to `1` since we cannot rely on (all
+ // versions of) all runtimes to do it for us.
+ if key == "LISTEN_PID" {
+ val = "1"
+ }
+ g.AddProcessEnv(key, val)
+ }
+ }
+
+ return g.Config, nil
+}
+
+// isWorkDirSymlink returns true if resolved workdir is symlink or a chain of symlinks,
+// and final resolved target is present either on volume, mount or inside of container
+// otherwise it returns false. Following function is meant for internal use only and
+// can change at any point of time.
+func (c *Container) isWorkDirSymlink(resolvedPath string) bool {
+ // We cannot create workdir since explicit --workdir is
+ // set in config but workdir could also be a symlink.
+ // If it's a symlink, check if the resolved target is present in the container.
+ // If so, that's a valid use case: return nil.
+
+ maxSymLinks := 0
+ for {
+ // Linux only supports a chain of 40 links.
+ // Reference: https://github.com/torvalds/linux/blob/master/include/linux/namei.h#L13
+ if maxSymLinks > 40 {
+ break
+ }
+ resolvedSymlink, err := os.Readlink(resolvedPath)
+ if err != nil {
+ // End sym-link resolution loop.
+ break
+ }
+ if resolvedSymlink != "" {
+ _, resolvedSymlinkWorkdir, err := c.resolvePath(c.state.Mountpoint, resolvedSymlink)
+ if isPathOnVolume(c, resolvedSymlinkWorkdir) || isPathOnMount(c, resolvedSymlinkWorkdir) {
+ // Resolved symlink exists on external volume or mount
+ return true
+ }
+ if err != nil {
+ // Could not resolve path so end sym-link resolution loop.
+ break
+ }
+ if resolvedSymlinkWorkdir != "" {
+ resolvedPath = resolvedSymlinkWorkdir
+ _, err := os.Stat(resolvedSymlinkWorkdir)
+ if err == nil {
+ // Symlink resolved successfully and resolved path exists on container,
+ // this is a valid use-case so return nil.
+ logrus.Debugf("Workdir is a symlink with target to %q and resolved symlink exists on container", resolvedSymlink)
+ return true
+ }
+ }
+ }
+ maxSymLinks++
+ }
+ return false
+}
+
+// resolveWorkDir resolves the container's workdir and, depending on the
+// configuration, will create it, or error out if it does not exist.
+// Note that the container must be mounted before.
+func (c *Container) resolveWorkDir() error {
+ workdir := c.WorkingDir()
+
+ // If the specified workdir is a subdir of a volume or mount,
+ // we don't need to do anything. The runtime is taking care of
+ // that.
+ if isPathOnVolume(c, workdir) || isPathOnMount(c, workdir) {
+ logrus.Debugf("Workdir %q resolved to a volume or mount", workdir)
+ return nil
+ }
+
+ _, resolvedWorkdir, err := c.resolvePath(c.state.Mountpoint, workdir)
+ if err != nil {
+ return err
+ }
+ logrus.Debugf("Workdir %q resolved to host path %q", workdir, resolvedWorkdir)
+
+ st, err := os.Stat(resolvedWorkdir)
+ if err == nil {
+ if !st.IsDir() {
+ return fmt.Errorf("workdir %q exists on container %s, but is not a directory", workdir, c.ID())
+ }
+ return nil
+ }
+ if !c.config.CreateWorkingDir {
+ // No need to create it (e.g., `--workdir=/foo`), so let's make sure
+ // the path exists on the container.
+ if err != nil {
+ if os.IsNotExist(err) {
+ // If resolved Workdir path gets marked as a valid symlink,
+ // return nil cause this is valid use-case.
+ if c.isWorkDirSymlink(resolvedWorkdir) {
+ return nil
+ }
+ return fmt.Errorf("workdir %q does not exist on container %s", workdir, c.ID())
+ }
+ // This might be a serious error (e.g., permission), so
+ // we need to return the full error.
+ return fmt.Errorf("detecting workdir %q on container %s: %w", workdir, c.ID(), err)
+ }
+ return nil
+ }
+ if err := os.MkdirAll(resolvedWorkdir, 0755); err != nil {
+ if os.IsExist(err) {
+ return nil
+ }
+ return fmt.Errorf("creating container %s workdir: %w", c.ID(), err)
+ }
+
+ // Ensure container entrypoint is created (if required).
+ uid, gid, _, err := chrootuser.GetUser(c.state.Mountpoint, c.User())
+ if err != nil {
+ return fmt.Errorf("looking up %s inside of the container %s: %w", c.User(), c.ID(), err)
+ }
+ if err := os.Chown(resolvedWorkdir, int(uid), int(gid)); err != nil {
+ return fmt.Errorf("chowning container %s workdir to container root: %w", c.ID(), err)
+ }
+
+ return nil
+}
+
+func (c *Container) getUserOverrides() *lookup.Overrides {
+ var hasPasswdFile, hasGroupFile bool
+ overrides := lookup.Overrides{}
+ for _, m := range c.config.Spec.Mounts {
+ if m.Destination == "/etc/passwd" {
+ overrides.ContainerEtcPasswdPath = m.Source
+ hasPasswdFile = true
+ }
+ if m.Destination == "/etc/group" {
+ overrides.ContainerEtcGroupPath = m.Source
+ hasGroupFile = true
+ }
+ if m.Destination == "/etc" {
+ if !hasPasswdFile {
+ overrides.ContainerEtcPasswdPath = filepath.Join(m.Source, "passwd")
+ }
+ if !hasGroupFile {
+ overrides.ContainerEtcGroupPath = filepath.Join(m.Source, "group")
+ }
+ }
+ }
+ if path, ok := c.state.BindMounts["/etc/passwd"]; ok {
+ overrides.ContainerEtcPasswdPath = path
+ }
+ return &overrides
+}
+
+func lookupHostUser(name string) (*runcuser.ExecUser, error) {
+ var execUser runcuser.ExecUser
+ // Look up User on host
+ u, err := util.LookupUser(name)
+ if err != nil {
+ return &execUser, err
+ }
+ uid, err := strconv.ParseUint(u.Uid, 8, 32)
+ if err != nil {
+ return &execUser, err
+ }
+
+ gid, err := strconv.ParseUint(u.Gid, 8, 32)
+ if err != nil {
+ return &execUser, err
+ }
+ execUser.Uid = int(uid)
+ execUser.Gid = int(gid)
+ execUser.Home = u.HomeDir
+ return &execUser, nil
+}
+
+// mountNotifySocket mounts the NOTIFY_SOCKET into the container if it's set
+// and if the sdnotify mode is set to container. It also sets c.notifySocket
+// to avoid redundantly looking up the env variable.
+func (c *Container) mountNotifySocket(g generate.Generator) error {
+ if c.config.SdNotifySocket == "" {
+ return nil
+ }
+ if c.config.SdNotifyMode != define.SdNotifyModeContainer {
+ return nil
+ }
+
+ notifyDir := filepath.Join(c.bundlePath(), "notify")
+ logrus.Debugf("Checking notify %q dir", notifyDir)
+ if err := os.MkdirAll(notifyDir, 0755); err != nil {
+ if !os.IsExist(err) {
+ return fmt.Errorf("unable to create notify %q dir: %w", notifyDir, err)
+ }
+ }
+ if err := label.Relabel(notifyDir, c.MountLabel(), true); err != nil {
+ return fmt.Errorf("relabel failed %q: %w", notifyDir, err)
+ }
+ logrus.Debugf("Add bindmount notify %q dir", notifyDir)
+ if _, ok := c.state.BindMounts["/run/notify"]; !ok {
+ c.state.BindMounts["/run/notify"] = notifyDir
+ }
+
+ // Set the container's notify socket to the proxy socket created by conmon
+ g.AddProcessEnv("NOTIFY_SOCKET", "/run/notify/notify.sock")
+
+ return nil
+}
+
+func (c *Container) addCheckpointImageMetadata(importBuilder *buildah.Builder) error {
+ // Get information about host environment
+ hostInfo, err := c.Runtime().hostInfo()
+ if err != nil {
+ return fmt.Errorf("getting host info: %v", err)
+ }
+
+ criuVersion, err := criu.GetCriuVersion()
+ if err != nil {
+ return fmt.Errorf("getting criu version: %v", err)
+ }
+
+ rootfsImageID, rootfsImageName := c.Image()
+
+ // Add image annotations with information about the container and the host.
+ // This information is useful to check compatibility before restoring the checkpoint
+
+ checkpointImageAnnotations := map[string]string{
+ define.CheckpointAnnotationName: c.config.Name,
+ define.CheckpointAnnotationRawImageName: c.config.RawImageName,
+ define.CheckpointAnnotationRootfsImageID: rootfsImageID,
+ define.CheckpointAnnotationRootfsImageName: rootfsImageName,
+ define.CheckpointAnnotationPodmanVersion: version.Version.String(),
+ define.CheckpointAnnotationCriuVersion: strconv.Itoa(criuVersion),
+ define.CheckpointAnnotationRuntimeName: hostInfo.OCIRuntime.Name,
+ define.CheckpointAnnotationRuntimeVersion: hostInfo.OCIRuntime.Version,
+ define.CheckpointAnnotationConmonVersion: hostInfo.Conmon.Version,
+ define.CheckpointAnnotationHostArch: hostInfo.Arch,
+ define.CheckpointAnnotationHostKernel: hostInfo.Kernel,
+ define.CheckpointAnnotationCgroupVersion: hostInfo.CgroupsVersion,
+ define.CheckpointAnnotationDistributionVersion: hostInfo.Distribution.Version,
+ define.CheckpointAnnotationDistributionName: hostInfo.Distribution.Distribution,
+ }
+
+ for key, value := range checkpointImageAnnotations {
+ importBuilder.SetAnnotation(key, value)
+ }
+
+ return nil
+}
+
+func (c *Container) resolveCheckpointImageName(options *ContainerCheckpointOptions) error {
+ if options.CreateImage == "" {
+ return nil
+ }
+
+ // Resolve image name
+ resolvedImageName, err := c.runtime.LibimageRuntime().ResolveName(options.CreateImage)
+ if err != nil {
+ return err
+ }
+
+ options.CreateImage = resolvedImageName
+ return nil
+}
+
+func (c *Container) createCheckpointImage(ctx context.Context, options ContainerCheckpointOptions) error {
+ if options.CreateImage == "" {
+ return nil
+ }
+ logrus.Debugf("Create checkpoint image %s", options.CreateImage)
+
+ // Create storage reference
+ imageRef, err := is.Transport.ParseStoreReference(c.runtime.store, options.CreateImage)
+ if err != nil {
+ return errors.New("failed to parse image name")
+ }
+
+ // Build an image scratch
+ builderOptions := buildah.BuilderOptions{
+ FromImage: "scratch",
+ }
+ importBuilder, err := buildah.NewBuilder(ctx, c.runtime.store, builderOptions)
+ if err != nil {
+ return err
+ }
+ // Clean up buildah working container
+ defer func() {
+ if err := importBuilder.Delete(); err != nil {
+ logrus.Errorf("Image builder delete failed: %v", err)
+ }
+ }()
+
+ if err := c.prepareCheckpointExport(); err != nil {
+ return err
+ }
+
+ // Export checkpoint into temporary tar file
+ tmpDir, err := ioutil.TempDir("", "checkpoint_image_")
+ if err != nil {
+ return err
+ }
+ defer os.RemoveAll(tmpDir)
+
+ options.TargetFile = path.Join(tmpDir, "checkpoint.tar")
+
+ if err := c.exportCheckpoint(options); err != nil {
+ return err
+ }
+
+ // Copy checkpoint from temporary tar file in the image
+ addAndCopyOptions := buildah.AddAndCopyOptions{}
+ if err := importBuilder.Add("", true, addAndCopyOptions, options.TargetFile); err != nil {
+ return err
+ }
+
+ if err := c.addCheckpointImageMetadata(importBuilder); err != nil {
+ return err
+ }
+
+ commitOptions := buildah.CommitOptions{
+ Squash: true,
+ SystemContext: c.runtime.imageContext,
+ }
+
+ // Create checkpoint image
+ id, _, _, err := importBuilder.Commit(ctx, imageRef, commitOptions)
+ if err != nil {
+ return err
+ }
+ logrus.Debugf("Created checkpoint image: %s", id)
+ return nil
+}
+
+func (c *Container) exportCheckpoint(options ContainerCheckpointOptions) error {
+ if len(c.Dependencies()) == 1 {
+ // Check if the dependency is an infra container. If it is we can checkpoint
+ // the container out of the Pod.
+ if c.config.Pod == "" {
+ return errors.New("cannot export checkpoints of containers with dependencies")
+ }
+
+ pod, err := c.runtime.state.Pod(c.config.Pod)
+ if err != nil {
+ return fmt.Errorf("container %s is in pod %s, but pod cannot be retrieved: %w", c.ID(), c.config.Pod, err)
+ }
+ infraID, err := pod.InfraContainerID()
+ if err != nil {
+ return fmt.Errorf("cannot retrieve infra container ID for pod %s: %w", c.config.Pod, err)
+ }
+ if c.Dependencies()[0] != infraID {
+ return errors.New("cannot export checkpoints of containers with dependencies")
+ }
+ }
+ if len(c.Dependencies()) > 1 {
+ return errors.New("cannot export checkpoints of containers with dependencies")
+ }
+ logrus.Debugf("Exporting checkpoint image of container %q to %q", c.ID(), options.TargetFile)
+
+ includeFiles := []string{
+ "artifacts",
+ metadata.DevShmCheckpointTar,
+ metadata.ConfigDumpFile,
+ metadata.SpecDumpFile,
+ metadata.NetworkStatusFile,
+ stats.StatsDump,
+ }
+
+ if c.LogDriver() == define.KubernetesLogging ||
+ c.LogDriver() == define.JSONLogging {
+ includeFiles = append(includeFiles, "ctr.log")
+ }
+ if options.PreCheckPoint {
+ includeFiles = append(includeFiles, preCheckpointDir)
+ } else {
+ includeFiles = append(includeFiles, metadata.CheckpointDirectory)
+ }
+ // Get root file-system changes included in the checkpoint archive
+ var addToTarFiles []string
+ if !options.IgnoreRootfs {
+ // To correctly track deleted files, let's go through the output of 'podman diff'
+ rootFsChanges, err := c.runtime.GetDiff("", c.ID(), define.DiffContainer)
+ if err != nil {
+ return fmt.Errorf("exporting root file-system diff for %q: %w", c.ID(), err)
+ }
+
+ addToTarFiles, err := crutils.CRCreateRootFsDiffTar(&rootFsChanges, c.state.Mountpoint, c.bundlePath())
+ if err != nil {
+ return err
+ }
+
+ includeFiles = append(includeFiles, addToTarFiles...)
+ }
+
+ // Folder containing archived volumes that will be included in the export
+ expVolDir := filepath.Join(c.bundlePath(), metadata.CheckpointVolumesDirectory)
+
+ // Create an archive for each volume associated with the container
+ if !options.IgnoreVolumes {
+ if err := os.MkdirAll(expVolDir, 0700); err != nil {
+ return fmt.Errorf("creating volumes export directory %q: %w", expVolDir, err)
+ }
+
+ for _, v := range c.config.NamedVolumes {
+ volumeTarFilePath := filepath.Join(metadata.CheckpointVolumesDirectory, v.Name+".tar")
+ volumeTarFileFullPath := filepath.Join(c.bundlePath(), volumeTarFilePath)
+
+ volumeTarFile, err := os.Create(volumeTarFileFullPath)
+ if err != nil {
+ return fmt.Errorf("creating %q: %w", volumeTarFileFullPath, err)
+ }
+
+ volume, err := c.runtime.GetVolume(v.Name)
+ if err != nil {
+ return err
+ }
+
+ mp, err := volume.MountPoint()
+ if err != nil {
+ return err
+ }
+ if mp == "" {
+ return fmt.Errorf("volume %s is not mounted, cannot export: %w", volume.Name(), define.ErrInternal)
+ }
+
+ input, err := archive.TarWithOptions(mp, &archive.TarOptions{
+ Compression: archive.Uncompressed,
+ IncludeSourceDir: true,
+ })
+ if err != nil {
+ return fmt.Errorf("reading volume directory %q: %w", v.Dest, err)
+ }
+
+ _, err = io.Copy(volumeTarFile, input)
+ if err != nil {
+ return err
+ }
+ volumeTarFile.Close()
+
+ includeFiles = append(includeFiles, volumeTarFilePath)
+ }
+ }
+
+ input, err := archive.TarWithOptions(c.bundlePath(), &archive.TarOptions{
+ Compression: options.Compression,
+ IncludeSourceDir: true,
+ IncludeFiles: includeFiles,
+ })
+
+ if err != nil {
+ return fmt.Errorf("reading checkpoint directory %q: %w", c.ID(), err)
+ }
+
+ outFile, err := os.Create(options.TargetFile)
+ if err != nil {
+ return fmt.Errorf("creating checkpoint export file %q: %w", options.TargetFile, err)
+ }
+ defer outFile.Close()
+
+ if err := os.Chmod(options.TargetFile, 0600); err != nil {
+ return err
+ }
+
+ _, err = io.Copy(outFile, input)
+ if err != nil {
+ return err
+ }
+
+ for _, file := range addToTarFiles {
+ os.Remove(filepath.Join(c.bundlePath(), file))
+ }
+
+ if !options.IgnoreVolumes {
+ os.RemoveAll(expVolDir)
+ }
+
+ return nil
+}
+
+func (c *Container) checkpointRestoreSupported(version int) error {
+ if !criu.CheckForCriu(version) {
+ return fmt.Errorf("checkpoint/restore requires at least CRIU %d", version)
+ }
+ if !c.ociRuntime.SupportsCheckpoint() {
+ return errors.New("configured runtime does not support checkpoint/restore")
+ }
+ return nil
+}
+
+func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointOptions) (*define.CRIUCheckpointRestoreStatistics, int64, error) {
+ if err := c.checkpointRestoreSupported(criu.MinCriuVersion); err != nil {
+ return nil, 0, err
+ }
+
+ if c.state.State != define.ContainerStateRunning {
+ return nil, 0, fmt.Errorf("%q is not running, cannot checkpoint: %w", c.state.State, define.ErrCtrStateInvalid)
+ }
+
+ if c.AutoRemove() && options.TargetFile == "" {
+ return nil, 0, errors.New("cannot checkpoint containers that have been started with '--rm' unless '--export' is used")
+ }
+
+ if err := c.resolveCheckpointImageName(&options); err != nil {
+ return nil, 0, err
+ }
+
+ if err := crutils.CRCreateFileWithLabel(c.bundlePath(), "dump.log", c.MountLabel()); err != nil {
+ return nil, 0, err
+ }
+
+ // Setting CheckpointLog early in case there is a failure.
+ c.state.CheckpointLog = path.Join(c.bundlePath(), "dump.log")
+ c.state.CheckpointPath = c.CheckpointPath()
+
+ runtimeCheckpointDuration, err := c.ociRuntime.CheckpointContainer(c, options)
+ if err != nil {
+ return nil, 0, err
+ }
+
+ // Keep the content of /dev/shm directory
+ if c.config.ShmDir != "" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir {
+ shmDirTarFileFullPath := filepath.Join(c.bundlePath(), metadata.DevShmCheckpointTar)
+
+ shmDirTarFile, err := os.Create(shmDirTarFileFullPath)
+ if err != nil {
+ return nil, 0, err
+ }
+ defer shmDirTarFile.Close()
+
+ input, err := archive.TarWithOptions(c.config.ShmDir, &archive.TarOptions{
+ Compression: archive.Uncompressed,
+ IncludeSourceDir: true,
+ })
+ if err != nil {
+ return nil, 0, err
+ }
+
+ if _, err = io.Copy(shmDirTarFile, input); err != nil {
+ return nil, 0, err
+ }
+ }
+
+ // Save network.status. This is needed to restore the container with
+ // the same IP. Currently limited to one IP address in a container
+ // with one interface.
+ // FIXME: will this break something?
+ if _, err := metadata.WriteJSONFile(c.getNetworkStatus(), c.bundlePath(), metadata.NetworkStatusFile); err != nil {
+ return nil, 0, err
+ }
+
+ defer c.newContainerEvent(events.Checkpoint)
+
+ // There is a bug from criu: https://github.com/checkpoint-restore/criu/issues/116
+ // We have to change the symbolic link from absolute path to relative path
+ if options.WithPrevious {
+ os.Remove(path.Join(c.CheckpointPath(), "parent"))
+ if err := os.Symlink("../pre-checkpoint", path.Join(c.CheckpointPath(), "parent")); err != nil {
+ return nil, 0, err
+ }
+ }
+
+ if options.TargetFile != "" {
+ if err := c.exportCheckpoint(options); err != nil {
+ return nil, 0, err
+ }
+ } else {
+ if err := c.createCheckpointImage(ctx, options); err != nil {
+ return nil, 0, err
+ }
+ }
+
+ logrus.Debugf("Checkpointed container %s", c.ID())
+
+ if !options.KeepRunning && !options.PreCheckPoint {
+ c.state.State = define.ContainerStateStopped
+ c.state.Checkpointed = true
+ c.state.CheckpointedTime = time.Now()
+ c.state.Restored = false
+ c.state.RestoredTime = time.Time{}
+
+ // Clean up Storage and Network
+ if err := c.cleanup(ctx); err != nil {
+ return nil, 0, err
+ }
+ }
+
+ criuStatistics, err := func() (*define.CRIUCheckpointRestoreStatistics, error) {
+ if !options.PrintStats {
+ return nil, nil
+ }
+ statsDirectory, err := os.Open(c.bundlePath())
+ if err != nil {
+ return nil, fmt.Errorf("not able to open %q: %w", c.bundlePath(), err)
+ }
+
+ dumpStatistics, err := stats.CriuGetDumpStats(statsDirectory)
+ if err != nil {
+ return nil, fmt.Errorf("displaying checkpointing statistics not possible: %w", err)
+ }
+
+ return &define.CRIUCheckpointRestoreStatistics{
+ FreezingTime: dumpStatistics.GetFreezingTime(),
+ FrozenTime: dumpStatistics.GetFrozenTime(),
+ MemdumpTime: dumpStatistics.GetMemdumpTime(),
+ MemwriteTime: dumpStatistics.GetMemwriteTime(),
+ PagesScanned: dumpStatistics.GetPagesScanned(),
+ PagesWritten: dumpStatistics.GetPagesWritten(),
+ }, nil
+ }()
+ if err != nil {
+ return nil, 0, err
+ }
+
+ if !options.Keep && !options.PreCheckPoint {
+ cleanup := []string{
+ "dump.log",
+ stats.StatsDump,
+ metadata.ConfigDumpFile,
+ metadata.SpecDumpFile,
+ }
+ for _, del := range cleanup {
+ file := filepath.Join(c.bundlePath(), del)
+ if err := os.Remove(file); err != nil {
+ logrus.Debugf("Unable to remove file %s", file)
+ }
+ }
+ // The file has been deleted. Do not mention it.
+ c.state.CheckpointLog = ""
+ }
+
+ c.state.FinishedTime = time.Now()
+ return criuStatistics, runtimeCheckpointDuration, c.save()
+}
+
+func (c *Container) generateContainerSpec() error {
+ // Make sure the newly created config.json exists on disk
+
+ // NewFromSpec() is deprecated according to its comment
+ // however the recommended replace just causes a nil map panic
+ //nolint:staticcheck
+ g := generate.NewFromSpec(c.config.Spec)
+
+ if err := c.saveSpec(g.Config); err != nil {
+ return fmt.Errorf("saving imported container specification for restore failed: %w", err)
+ }
+
+ return nil
+}
+
+func (c *Container) importCheckpointImage(ctx context.Context, imageID string) error {
+ img, _, err := c.Runtime().LibimageRuntime().LookupImage(imageID, nil)
+ if err != nil {
+ return err
+ }
+
+ mountPoint, err := img.Mount(ctx, nil, "")
+ defer func() {
+ if err := c.unmount(true); err != nil {
+ logrus.Errorf("Failed to unmount container: %v", err)
+ }
+ }()
+ if err != nil {
+ return err
+ }
+
+ // Import all checkpoint files except ConfigDumpFile and SpecDumpFile. We
+ // generate new container config files to enable to specifying a new
+ // container name.
+ checkpoint := []string{
+ "artifacts",
+ metadata.CheckpointDirectory,
+ metadata.CheckpointVolumesDirectory,
+ metadata.DevShmCheckpointTar,
+ metadata.RootFsDiffTar,
+ metadata.DeletedFilesFile,
+ metadata.PodOptionsFile,
+ metadata.PodDumpFile,
+ }
+
+ for _, name := range checkpoint {
+ src := filepath.Join(mountPoint, name)
+ dst := filepath.Join(c.bundlePath(), name)
+ if err := archive.NewDefaultArchiver().CopyWithTar(src, dst); err != nil {
+ logrus.Debugf("Can't import '%s' from checkpoint image", name)
+ }
+ }
+
+ return c.generateContainerSpec()
+}
+
+func (c *Container) importCheckpointTar(input string) error {
+ if err := crutils.CRImportCheckpointWithoutConfig(c.bundlePath(), input); err != nil {
+ return err
+ }
+
+ return c.generateContainerSpec()
+}
+
+func (c *Container) importPreCheckpoint(input string) error {
+ archiveFile, err := os.Open(input)
+ if err != nil {
+ return fmt.Errorf("failed to open pre-checkpoint archive for import: %w", err)
+ }
+
+ defer archiveFile.Close()
+
+ err = archive.Untar(archiveFile, c.bundlePath(), nil)
+ if err != nil {
+ return fmt.Errorf("unpacking of pre-checkpoint archive %s failed: %w", input, err)
+ }
+ return nil
+}
+
+func (c *Container) restore(ctx context.Context, options ContainerCheckpointOptions) (criuStatistics *define.CRIUCheckpointRestoreStatistics, runtimeRestoreDuration int64, retErr error) {
+ minCriuVersion := func() int {
+ if options.Pod == "" {
+ return criu.MinCriuVersion
+ }
+ return criu.PodCriuVersion
+ }()
+ if err := c.checkpointRestoreSupported(minCriuVersion); err != nil {
+ return nil, 0, err
+ }
+
+ if options.Pod != "" && !crutils.CRRuntimeSupportsPodCheckpointRestore(c.ociRuntime.Path()) {
+ return nil, 0, fmt.Errorf("runtime %s does not support pod restore", c.ociRuntime.Path())
+ }
+
+ if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateExited) {
+ return nil, 0, fmt.Errorf("container %s is running or paused, cannot restore: %w", c.ID(), define.ErrCtrStateInvalid)
+ }
+
+ if options.ImportPrevious != "" {
+ if err := c.importPreCheckpoint(options.ImportPrevious); err != nil {
+ return nil, 0, err
+ }
+ }
+
+ if options.TargetFile != "" {
+ if err := c.importCheckpointTar(options.TargetFile); err != nil {
+ return nil, 0, err
+ }
+ } else if options.CheckpointImageID != "" {
+ if err := c.importCheckpointImage(ctx, options.CheckpointImageID); err != nil {
+ return nil, 0, err
+ }
+ }
+
+ // Let's try to stat() CRIU's inventory file. If it does not exist, it makes
+ // no sense to try a restore. This is a minimal check if a checkpoint exist.
+ if _, err := os.Stat(filepath.Join(c.CheckpointPath(), "inventory.img")); os.IsNotExist(err) {
+ return nil, 0, fmt.Errorf("a complete checkpoint for this container cannot be found, cannot restore: %w", err)
+ }
+
+ if err := crutils.CRCreateFileWithLabel(c.bundlePath(), "restore.log", c.MountLabel()); err != nil {
+ return nil, 0, err
+ }
+
+ // Setting RestoreLog early in case there is a failure.
+ c.state.RestoreLog = path.Join(c.bundlePath(), "restore.log")
+ c.state.CheckpointPath = c.CheckpointPath()
+
+ // Read network configuration from checkpoint
+ var netStatus map[string]types.StatusBlock
+ _, err := metadata.ReadJSONFile(&netStatus, c.bundlePath(), metadata.NetworkStatusFile)
+ if err != nil {
+ logrus.Infof("Failed to unmarshal network status, cannot restore the same ip/mac: %v", err)
+ }
+ // If the restored container should get a new name, the IP address of
+ // the container will not be restored. This assumes that if a new name is
+ // specified, the container is restored multiple times.
+ // TODO: This implicit restoring with or without IP depending on an
+ // unrelated restore parameter (--name) does not seem like the
+ // best solution.
+ if err == nil && options.Name == "" && (!options.IgnoreStaticIP || !options.IgnoreStaticMAC) {
+ // The file with the network.status does exist. Let's restore the
+ // container with the same networks settings as during checkpointing.
+ networkOpts, err := c.networks()
+ if err != nil {
+ return nil, 0, err
+ }
+
+ netOpts := make(map[string]types.PerNetworkOptions, len(netStatus))
+ for network, perNetOpts := range networkOpts {
+ // unset mac and ips before we start adding the ones from the status
+ perNetOpts.StaticMAC = nil
+ perNetOpts.StaticIPs = nil
+ for name, netInt := range netStatus[network].Interfaces {
+ perNetOpts.InterfaceName = name
+ if !options.IgnoreStaticIP {
+ perNetOpts.StaticMAC = netInt.MacAddress
+ }
+ if !options.IgnoreStaticIP {
+ for _, netAddress := range netInt.Subnets {
+ perNetOpts.StaticIPs = append(perNetOpts.StaticIPs, netAddress.IPNet.IP)
+ }
+ }
+ // Normally interfaces have a length of 1, only for some special cni configs we could get more.
+ // For now just use the first interface to get the ips this should be good enough for most cases.
+ break
+ }
+ netOpts[network] = perNetOpts
+ }
+ c.perNetworkOpts = netOpts
+ }
+
+ defer func() {
+ if retErr != nil {
+ if err := c.cleanup(ctx); err != nil {
+ logrus.Errorf("Cleaning up container %s: %v", c.ID(), err)
+ }
+ }
+ }()
+
+ if err := c.prepare(); err != nil {
+ return nil, 0, err
+ }
+
+ // Read config
+ jsonPath := filepath.Join(c.bundlePath(), "config.json")
+ logrus.Debugf("generate.NewFromFile at %v", jsonPath)
+ g, err := generate.NewFromFile(jsonPath)
+ if err != nil {
+ logrus.Debugf("generate.NewFromFile failed with %v", err)
+ return nil, 0, err
+ }
+
+ // Restoring from an import means that we are doing migration
+ if options.TargetFile != "" || options.CheckpointImageID != "" {
+ g.SetRootPath(c.state.Mountpoint)
+ }
+
+ // We want to have the same network namespace as before.
+ if err := c.addNetworkNamespace(&g); err != nil {
+ return nil, 0, err
+ }
+
+ if options.Pod != "" {
+ // Running in a Pod means that we have to change all namespace settings to
+ // the ones from the infrastructure container.
+ pod, err := c.runtime.LookupPod(options.Pod)
+ if err != nil {
+ return nil, 0, fmt.Errorf("pod %q cannot be retrieved: %w", options.Pod, err)
+ }
+
+ infraContainer, err := pod.InfraContainer()
+ if err != nil {
+ return nil, 0, fmt.Errorf("cannot retrieved infra container from pod %q: %w", options.Pod, err)
+ }
+
+ infraContainer.lock.Lock()
+ if err := infraContainer.syncContainer(); err != nil {
+ infraContainer.lock.Unlock()
+ return nil, 0, fmt.Errorf("syncing infrastructure container %s status: %w", infraContainer.ID(), err)
+ }
+ if infraContainer.state.State != define.ContainerStateRunning {
+ if err := infraContainer.initAndStart(ctx); err != nil {
+ infraContainer.lock.Unlock()
+ return nil, 0, fmt.Errorf("starting infrastructure container %s status: %w", infraContainer.ID(), err)
+ }
+ }
+ infraContainer.lock.Unlock()
+
+ if c.config.IPCNsCtr != "" {
+ nsPath, err := infraContainer.namespacePath(IPCNS)
+ if err != nil {
+ return nil, 0, fmt.Errorf("cannot retrieve IPC namespace path for Pod %q: %w", options.Pod, err)
+ }
+ if err := g.AddOrReplaceLinuxNamespace(string(spec.IPCNamespace), nsPath); err != nil {
+ return nil, 0, err
+ }
+ }
+
+ if c.config.NetNsCtr != "" {
+ nsPath, err := infraContainer.namespacePath(NetNS)
+ if err != nil {
+ return nil, 0, fmt.Errorf("cannot retrieve network namespace path for Pod %q: %w", options.Pod, err)
+ }
+ if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), nsPath); err != nil {
+ return nil, 0, err
+ }
+ }
+
+ if c.config.PIDNsCtr != "" {
+ nsPath, err := infraContainer.namespacePath(PIDNS)
+ if err != nil {
+ return nil, 0, fmt.Errorf("cannot retrieve PID namespace path for Pod %q: %w", options.Pod, err)
+ }
+ if err := g.AddOrReplaceLinuxNamespace(string(spec.PIDNamespace), nsPath); err != nil {
+ return nil, 0, err
+ }
+ }
+
+ if c.config.UTSNsCtr != "" {
+ nsPath, err := infraContainer.namespacePath(UTSNS)
+ if err != nil {
+ return nil, 0, fmt.Errorf("cannot retrieve UTS namespace path for Pod %q: %w", options.Pod, err)
+ }
+ if err := g.AddOrReplaceLinuxNamespace(string(spec.UTSNamespace), nsPath); err != nil {
+ return nil, 0, err
+ }
+ }
+
+ if c.config.CgroupNsCtr != "" {
+ nsPath, err := infraContainer.namespacePath(CgroupNS)
+ if err != nil {
+ return nil, 0, fmt.Errorf("cannot retrieve Cgroup namespace path for Pod %q: %w", options.Pod, err)
+ }
+ if err := g.AddOrReplaceLinuxNamespace(string(spec.CgroupNamespace), nsPath); err != nil {
+ return nil, 0, err
+ }
+ }
+ }
+
+ if err := c.makeBindMounts(); err != nil {
+ return nil, 0, err
+ }
+
+ if options.TargetFile != "" || options.CheckpointImageID != "" {
+ for dstPath, srcPath := range c.state.BindMounts {
+ newMount := spec.Mount{
+ Type: "bind",
+ Source: srcPath,
+ Destination: dstPath,
+ Options: []string{"bind", "private"},
+ }
+ if c.IsReadOnly() && dstPath != "/dev/shm" {
+ newMount.Options = append(newMount.Options, "ro", "nosuid", "noexec", "nodev")
+ }
+ if dstPath == "/dev/shm" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir {
+ newMount.Options = append(newMount.Options, "nosuid", "noexec", "nodev")
+ }
+ if !MountExists(g.Mounts(), dstPath) {
+ g.AddMount(newMount)
+ }
+ }
+ }
+
+ // Restore /dev/shm content
+ if c.config.ShmDir != "" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir {
+ shmDirTarFileFullPath := filepath.Join(c.bundlePath(), metadata.DevShmCheckpointTar)
+ if _, err := os.Stat(shmDirTarFileFullPath); err != nil {
+ logrus.Debug("Container checkpoint doesn't contain dev/shm: ", err.Error())
+ } else {
+ shmDirTarFile, err := os.Open(shmDirTarFileFullPath)
+ if err != nil {
+ return nil, 0, err
+ }
+ defer shmDirTarFile.Close()
+
+ if err := archive.UntarUncompressed(shmDirTarFile, c.config.ShmDir, nil); err != nil {
+ return nil, 0, err
+ }
+ }
+ }
+
+ // Cleanup for a working restore.
+ if err := c.removeConmonFiles(); err != nil {
+ return nil, 0, err
+ }
+
+ // Save the OCI spec to disk
+ if err := c.saveSpec(g.Config); err != nil {
+ return nil, 0, err
+ }
+
+ // When restoring from an imported archive, allow restoring the content of volumes.
+ // Volumes are created in setupContainer()
+ if !options.IgnoreVolumes && (options.TargetFile != "" || options.CheckpointImageID != "") {
+ for _, v := range c.config.NamedVolumes {
+ volumeFilePath := filepath.Join(c.bundlePath(), metadata.CheckpointVolumesDirectory, v.Name+".tar")
+
+ volumeFile, err := os.Open(volumeFilePath)
+ if err != nil {
+ return nil, 0, fmt.Errorf("failed to open volume file %s: %w", volumeFilePath, err)
+ }
+ defer volumeFile.Close()
+
+ volume, err := c.runtime.GetVolume(v.Name)
+ if err != nil {
+ return nil, 0, fmt.Errorf("failed to retrieve volume %s: %w", v.Name, err)
+ }
+
+ mountPoint, err := volume.MountPoint()
+ if err != nil {
+ return nil, 0, err
+ }
+ if mountPoint == "" {
+ return nil, 0, fmt.Errorf("unable to import volume %s as it is not mounted: %w", volume.Name(), err)
+ }
+ if err := archive.UntarUncompressed(volumeFile, mountPoint, nil); err != nil {
+ return nil, 0, fmt.Errorf("failed to extract volume %s to %s: %w", volumeFilePath, mountPoint, err)
+ }
+ }
+ }
+
+ // Before actually restarting the container, apply the root file-system changes
+ if !options.IgnoreRootfs {
+ if err := crutils.CRApplyRootFsDiffTar(c.bundlePath(), c.state.Mountpoint); err != nil {
+ return nil, 0, err
+ }
+
+ if err := crutils.CRRemoveDeletedFiles(c.ID(), c.bundlePath(), c.state.Mountpoint); err != nil {
+ return nil, 0, err
+ }
+ }
+
+ runtimeRestoreDuration, err = c.ociRuntime.CreateContainer(c, &options)
+ if err != nil {
+ return nil, 0, err
+ }
+
+ criuStatistics, err = func() (*define.CRIUCheckpointRestoreStatistics, error) {
+ if !options.PrintStats {
+ return nil, nil
+ }
+ statsDirectory, err := os.Open(c.bundlePath())
+ if err != nil {
+ return nil, fmt.Errorf("not able to open %q: %w", c.bundlePath(), err)
+ }
+
+ restoreStatistics, err := stats.CriuGetRestoreStats(statsDirectory)
+ if err != nil {
+ return nil, fmt.Errorf("displaying restore statistics not possible: %w", err)
+ }
+
+ return &define.CRIUCheckpointRestoreStatistics{
+ PagesCompared: restoreStatistics.GetPagesCompared(),
+ PagesSkippedCow: restoreStatistics.GetPagesSkippedCow(),
+ ForkingTime: restoreStatistics.GetForkingTime(),
+ RestoreTime: restoreStatistics.GetRestoreTime(),
+ PagesRestored: restoreStatistics.GetPagesRestored(),
+ }, nil
+ }()
+ if err != nil {
+ return nil, 0, err
+ }
+
+ logrus.Debugf("Restored container %s", c.ID())
+
+ c.state.State = define.ContainerStateRunning
+ c.state.Checkpointed = false
+ c.state.Restored = true
+ c.state.CheckpointedTime = time.Time{}
+ c.state.RestoredTime = time.Now()
+
+ if !options.Keep {
+ // Delete all checkpoint related files. At this point, in theory, all files
+ // should exist. Still ignoring errors for now as the container should be
+ // restored and running. Not erroring out just because some cleanup operation
+ // failed. Starting with the checkpoint directory
+ err = os.RemoveAll(c.CheckpointPath())
+ if err != nil {
+ logrus.Debugf("Non-fatal: removal of checkpoint directory (%s) failed: %v", c.CheckpointPath(), err)
+ }
+ c.state.CheckpointPath = ""
+ err = os.RemoveAll(c.PreCheckPointPath())
+ if err != nil {
+ logrus.Debugf("Non-fatal: removal of pre-checkpoint directory (%s) failed: %v", c.PreCheckPointPath(), err)
+ }
+ err = os.RemoveAll(c.CheckpointVolumesPath())
+ if err != nil {
+ logrus.Debugf("Non-fatal: removal of checkpoint volumes directory (%s) failed: %v", c.CheckpointVolumesPath(), err)
+ }
+ cleanup := [...]string{
+ "restore.log",
+ "dump.log",
+ stats.StatsDump,
+ stats.StatsRestore,
+ metadata.DevShmCheckpointTar,
+ metadata.NetworkStatusFile,
+ metadata.RootFsDiffTar,
+ metadata.DeletedFilesFile,
+ }
+ for _, del := range cleanup {
+ file := filepath.Join(c.bundlePath(), del)
+ err = os.Remove(file)
+ if err != nil {
+ logrus.Debugf("Non-fatal: removal of checkpoint file (%s) failed: %v", file, err)
+ }
+ }
+ c.state.CheckpointLog = ""
+ c.state.RestoreLog = ""
+ }
+
+ return criuStatistics, runtimeRestoreDuration, c.save()
+}
+
+// Retrieves a container's "root" net namespace container dependency.
+func (c *Container) getRootNetNsDepCtr() (depCtr *Container, err error) {
+ containersVisited := map[string]int{c.config.ID: 1}
+ nextCtr := c.config.NetNsCtr
+ for nextCtr != "" {
+ // Make sure we aren't in a loop
+ if _, visited := containersVisited[nextCtr]; visited {
+ return nil, errors.New("loop encountered while determining net namespace container")
+ }
+ containersVisited[nextCtr] = 1
+
+ depCtr, err = c.runtime.state.Container(nextCtr)
+ if err != nil {
+ return nil, fmt.Errorf("fetching dependency %s of container %s: %w", c.config.NetNsCtr, c.ID(), err)
+ }
+ // This should never happen without an error
+ if depCtr == nil {
+ break
+ }
+ nextCtr = depCtr.config.NetNsCtr
+ }
+
+ if depCtr == nil {
+ return nil, errors.New("unexpected error depCtr is nil without reported error from runtime state")
+ }
+ return depCtr, nil
+}
+
+// Ensure standard bind mounts are mounted into all root directories (including chroot directories)
+func (c *Container) mountIntoRootDirs(mountName string, mountPath string) error {
+ c.state.BindMounts[mountName] = mountPath
+
+ for _, chrootDir := range c.config.ChrootDirs {
+ c.state.BindMounts[filepath.Join(chrootDir, mountName)] = mountPath
+ }
+
+ return nil
+}
+
+// Make standard bind mounts to include in the container
+func (c *Container) makeBindMounts() error {
+ if err := os.Chown(c.state.RunDir, c.RootUID(), c.RootGID()); err != nil {
+ return fmt.Errorf("cannot chown run directory: %w", err)
+ }
+
+ if c.state.BindMounts == nil {
+ c.state.BindMounts = make(map[string]string)
+ }
+ netDisabled, err := c.NetworkDisabled()
+ if err != nil {
+ return err
+ }
+
+ if !netDisabled {
+ // If /etc/resolv.conf and /etc/hosts exist, delete them so we
+ // will recreate. Only do this if we aren't sharing them with
+ // another container.
+ if c.config.NetNsCtr == "" {
+ if resolvePath, ok := c.state.BindMounts["/etc/resolv.conf"]; ok {
+ if err := os.Remove(resolvePath); err != nil && !os.IsNotExist(err) {
+ return fmt.Errorf("container %s: %w", c.ID(), err)
+ }
+ delete(c.state.BindMounts, "/etc/resolv.conf")
+ }
+ if hostsPath, ok := c.state.BindMounts["/etc/hosts"]; ok {
+ if err := os.Remove(hostsPath); err != nil && !os.IsNotExist(err) {
+ return fmt.Errorf("container %s: %w", c.ID(), err)
+ }
+ delete(c.state.BindMounts, "/etc/hosts")
+ }
+ }
+
+ if c.config.NetNsCtr != "" && (!c.config.UseImageResolvConf || !c.config.UseImageHosts) {
+ // We share a net namespace.
+ // We want /etc/resolv.conf and /etc/hosts from the
+ // other container. Unless we're not creating both of
+ // them.
+ depCtr, err := c.getRootNetNsDepCtr()
+ if err != nil {
+ return fmt.Errorf("fetching network namespace dependency container for container %s: %w", c.ID(), err)
+ }
+
+ // We need that container's bind mounts
+ bindMounts, err := depCtr.BindMounts()
+ if err != nil {
+ return fmt.Errorf("fetching bind mounts from dependency %s of container %s: %w", depCtr.ID(), c.ID(), err)
+ }
+
+ // The other container may not have a resolv.conf or /etc/hosts
+ // If it doesn't, don't copy them
+ resolvPath, exists := bindMounts["/etc/resolv.conf"]
+ if !c.config.UseImageResolvConf && exists {
+ err := c.mountIntoRootDirs("/etc/resolv.conf", resolvPath)
+
+ if err != nil {
+ return fmt.Errorf("assigning mounts to container %s: %w", c.ID(), err)
+ }
+ }
+
+ // check if dependency container has an /etc/hosts file.
+ // It may not have one, so only use it if it does.
+ hostsPath, exists := bindMounts[config.DefaultHostsFile]
+ if !c.config.UseImageHosts && exists {
+ // we cannot use the dependency container lock due ABBA deadlocks in cleanup()
+ lock, err := lockfile.GetLockfile(hostsPath)
+ if err != nil {
+ return fmt.Errorf("failed to lock hosts file: %w", err)
+ }
+ lock.Lock()
+
+ // add the newly added container to the hosts file
+ // we always use 127.0.0.1 as ip since they have the same netns
+ err = etchosts.Add(hostsPath, getLocalhostHostEntry(c))
+ lock.Unlock()
+ if err != nil {
+ return fmt.Errorf("creating hosts file for container %s which depends on container %s: %w", c.ID(), depCtr.ID(), err)
+ }
+
+ // finally, save it in the new container
+ err = c.mountIntoRootDirs(config.DefaultHostsFile, hostsPath)
+ if err != nil {
+ return fmt.Errorf("assigning mounts to container %s: %w", c.ID(), err)
+ }
+ }
+
+ if !hasCurrentUserMapped(c) {
+ if err := makeAccessible(resolvPath, c.RootUID(), c.RootGID()); err != nil {
+ return err
+ }
+ if err := makeAccessible(hostsPath, c.RootUID(), c.RootGID()); err != nil {
+ return err
+ }
+ }
+ } else {
+ if !c.config.UseImageResolvConf {
+ if err := c.generateResolvConf(); err != nil {
+ return fmt.Errorf("creating resolv.conf for container %s: %w", c.ID(), err)
+ }
+ }
+
+ if !c.config.UseImageHosts {
+ if err := c.createHosts(); err != nil {
+ return fmt.Errorf("creating hosts file for container %s: %w", c.ID(), err)
+ }
+ }
+ }
+
+ if c.state.BindMounts["/etc/hosts"] != "" {
+ if err := c.relabel(c.state.BindMounts["/etc/hosts"], c.config.MountLabel, true); err != nil {
+ return err
+ }
+ }
+
+ if c.state.BindMounts["/etc/resolv.conf"] != "" {
+ if err := c.relabel(c.state.BindMounts["/etc/resolv.conf"], c.config.MountLabel, true); err != nil {
+ return err
+ }
+ }
+ } else if !c.config.UseImageHosts && c.state.BindMounts["/etc/hosts"] == "" {
+ if err := c.createHosts(); err != nil {
+ return fmt.Errorf("creating hosts file for container %s: %w", c.ID(), err)
+ }
+ }
+
+ if c.config.ShmDir != "" {
+ // If ShmDir has a value SHM is always added when we mount the container
+ c.state.BindMounts["/dev/shm"] = c.config.ShmDir
+ }
+
+ if c.config.Passwd == nil || *c.config.Passwd {
+ newPasswd, newGroup, err := c.generatePasswdAndGroup()
+ if err != nil {
+ return fmt.Errorf("creating temporary passwd file for container %s: %w", c.ID(), err)
+ }
+ if newPasswd != "" {
+ // Make /etc/passwd
+ // If it already exists, delete so we can recreate
+ delete(c.state.BindMounts, "/etc/passwd")
+ c.state.BindMounts["/etc/passwd"] = newPasswd
+ }
+ if newGroup != "" {
+ // Make /etc/group
+ // If it already exists, delete so we can recreate
+ delete(c.state.BindMounts, "/etc/group")
+ c.state.BindMounts["/etc/group"] = newGroup
+ }
+ }
+
+ // Make /etc/localtime
+ ctrTimezone := c.Timezone()
+ if ctrTimezone != "" {
+ // validate the format of the timezone specified if it's not "local"
+ if ctrTimezone != "local" {
+ _, err = time.LoadLocation(ctrTimezone)
+ if err != nil {
+ return fmt.Errorf("finding timezone for container %s: %w", c.ID(), err)
+ }
+ }
+ if _, ok := c.state.BindMounts["/etc/localtime"]; !ok {
+ var zonePath string
+ if ctrTimezone == "local" {
+ zonePath, err = filepath.EvalSymlinks("/etc/localtime")
+ if err != nil {
+ return fmt.Errorf("finding local timezone for container %s: %w", c.ID(), err)
+ }
+ } else {
+ zone := filepath.Join("/usr/share/zoneinfo", ctrTimezone)
+ zonePath, err = filepath.EvalSymlinks(zone)
+ if err != nil {
+ return fmt.Errorf("setting timezone for container %s: %w", c.ID(), err)
+ }
+ }
+ localtimePath, err := c.copyTimezoneFile(zonePath)
+ if err != nil {
+ return fmt.Errorf("setting timezone for container %s: %w", c.ID(), err)
+ }
+ c.state.BindMounts["/etc/localtime"] = localtimePath
+ }
+ }
+
+ _, hasRunContainerenv := c.state.BindMounts["/run/.containerenv"]
+ if !hasRunContainerenv {
+ // check in the spec mounts
+ for _, m := range c.config.Spec.Mounts {
+ if m.Destination == "/run/.containerenv" || m.Destination == "/run" {
+ hasRunContainerenv = true
+ break
+ }
+ }
+ }
+
+ // Make .containerenv if it does not exist
+ if !hasRunContainerenv {
+ containerenv := c.runtime.graphRootMountedFlag(c.config.Spec.Mounts)
+ isRootless := 0
+ if rootless.IsRootless() {
+ isRootless = 1
+ }
+ imageID, imageName := c.Image()
+
+ if c.Privileged() {
+ // Populate the .containerenv with container information
+ containerenv = fmt.Sprintf(`engine="podman-%s"
+name=%q
+id=%q
+image=%q
+imageid=%q
+rootless=%d
+%s`, version.Version.String(), c.Name(), c.ID(), imageName, imageID, isRootless, containerenv)
+ }
+ containerenvPath, err := c.writeStringToRundir(".containerenv", containerenv)
+ if err != nil {
+ return fmt.Errorf("creating containerenv file for container %s: %w", c.ID(), err)
+ }
+ c.state.BindMounts["/run/.containerenv"] = containerenvPath
+ }
+
+ // Add Subscription Mounts
+ subscriptionMounts := subscriptions.MountsWithUIDGID(c.config.MountLabel, c.state.RunDir, c.runtime.config.Containers.DefaultMountsFile, c.state.Mountpoint, c.RootUID(), c.RootGID(), rootless.IsRootless(), false)
+ for _, mount := range subscriptionMounts {
+ if _, ok := c.state.BindMounts[mount.Destination]; !ok {
+ c.state.BindMounts[mount.Destination] = mount.Source
+ }
+ }
+
+ // Secrets are mounted by getting the secret data from the secrets manager,
+ // copying the data into the container's static dir,
+ // then mounting the copied dir into /run/secrets.
+ // The secrets mounting must come after subscription mounts, since subscription mounts
+ // creates the /run/secrets dir in the container where we mount as well.
+ if len(c.Secrets()) > 0 {
+ // create /run/secrets if subscriptions did not create
+ if err := c.createSecretMountDir(); err != nil {
+ return fmt.Errorf("creating secrets mount: %w", err)
+ }
+ for _, secret := range c.Secrets() {
+ secretFileName := secret.Name
+ base := "/run/secrets"
+ if secret.Target != "" {
+ secretFileName = secret.Target
+ // If absolute path for target given remove base.
+ if filepath.IsAbs(secretFileName) {
+ base = ""
+ }
+ }
+ src := filepath.Join(c.config.SecretsPath, secret.Name)
+ dest := filepath.Join(base, secretFileName)
+ c.state.BindMounts[dest] = src
+ }
+ }
+
+ return c.makePlatformBindMounts()
+}
+
+// generateResolvConf generates a containers resolv.conf
+func (c *Container) generateResolvConf() error {
+ var (
+ networkNameServers []string
+ networkSearchDomains []string
+ )
+
+ netStatus := c.getNetworkStatus()
+ for _, status := range netStatus {
+ if status.DNSServerIPs != nil {
+ for _, nsIP := range status.DNSServerIPs {
+ networkNameServers = append(networkNameServers, nsIP.String())
+ }
+ logrus.Debugf("Adding nameserver(s) from network status of '%q'", status.DNSServerIPs)
+ }
+ if status.DNSSearchDomains != nil {
+ networkSearchDomains = append(networkSearchDomains, status.DNSSearchDomains...)
+ logrus.Debugf("Adding search domain(s) from network status of '%q'", status.DNSSearchDomains)
+ }
+ }
+
+ ipv6, err := c.checkForIPv6(netStatus)
+ if err != nil {
+ return err
+ }
+
+ nameservers := make([]string, 0, len(c.runtime.config.Containers.DNSServers)+len(c.config.DNSServer))
+ nameservers = append(nameservers, c.runtime.config.Containers.DNSServers...)
+ for _, ip := range c.config.DNSServer {
+ nameservers = append(nameservers, ip.String())
+ }
+ // If the user provided dns, it trumps all; then dns masq; then resolv.conf
+ var search []string
+ keepHostServers := false
+ if len(nameservers) == 0 {
+ keepHostServers = true
+ // first add the nameservers from the networks status
+ nameservers = networkNameServers
+ // when we add network dns server we also have to add the search domains
+ search = networkSearchDomains
+ // slirp4netns has a built in DNS forwarder.
+ nameservers = c.addSlirp4netnsDNS(nameservers)
+ }
+
+ if len(c.config.DNSSearch) > 0 || len(c.runtime.config.Containers.DNSSearches) > 0 {
+ customSearch := make([]string, 0, len(c.config.DNSSearch)+len(c.runtime.config.Containers.DNSSearches))
+ customSearch = append(customSearch, c.runtime.config.Containers.DNSSearches...)
+ customSearch = append(customSearch, c.config.DNSSearch...)
+ search = customSearch
+ }
+
+ options := make([]string, 0, len(c.config.DNSOption)+len(c.runtime.config.Containers.DNSOptions))
+ options = append(options, c.runtime.config.Containers.DNSOptions...)
+ options = append(options, c.config.DNSOption...)
+
+ destPath := filepath.Join(c.state.RunDir, "resolv.conf")
+
+ var namespaces []spec.LinuxNamespace
+ if c.config.Spec.Linux != nil {
+ namespaces = c.config.Spec.Linux.Namespaces
+ }
+
+ if err := resolvconf.New(&resolvconf.Params{
+ IPv6Enabled: ipv6,
+ KeepHostServers: keepHostServers,
+ Nameservers: nameservers,
+ Namespaces: namespaces,
+ Options: options,
+ Path: destPath,
+ Searches: search,
+ }); err != nil {
+ return fmt.Errorf("building resolv.conf for container %s: %w", c.ID(), err)
+ }
+
+ return c.bindMountRootFile(destPath, resolvconf.DefaultResolvConf)
+}
+
+// Check if a container uses IPv6.
+func (c *Container) checkForIPv6(netStatus map[string]types.StatusBlock) (bool, error) {
+ for _, status := range netStatus {
+ for _, netInt := range status.Interfaces {
+ for _, netAddress := range netInt.Subnets {
+ // Note: only using To16() does not work since it also returns a valid ip for ipv4
+ if netAddress.IPNet.IP.To4() == nil && netAddress.IPNet.IP.To16() != nil {
+ return true, nil
+ }
+ }
+ }
+ }
+
+ return c.isSlirp4netnsIPv6()
+}
+
+// Add a new nameserver to the container's resolv.conf, ensuring that it is the
+// first nameserver present.
+// Usable only with running containers.
+func (c *Container) addNameserver(ips []string) error {
+ // Take no action if container is not running.
+ if !c.ensureState(define.ContainerStateRunning, define.ContainerStateCreated) {
+ return nil
+ }
+
+ // Do we have a resolv.conf at all?
+ path, ok := c.state.BindMounts[resolvconf.DefaultResolvConf]
+ if !ok {
+ return nil
+ }
+
+ if err := resolvconf.Add(path, ips); err != nil {
+ return fmt.Errorf("adding new nameserver to container %s resolv.conf: %w", c.ID(), err)
+ }
+
+ return nil
+}
+
+// Remove an entry from the existing resolv.conf of the container.
+// Usable only with running containers.
+func (c *Container) removeNameserver(ips []string) error {
+ // Take no action if container is not running.
+ if !c.ensureState(define.ContainerStateRunning, define.ContainerStateCreated) {
+ return nil
+ }
+
+ // Do we have a resolv.conf at all?
+ path, ok := c.state.BindMounts[resolvconf.DefaultResolvConf]
+ if !ok {
+ return nil
+ }
+
+ if err := resolvconf.Remove(path, ips); err != nil {
+ return fmt.Errorf("removing nameservers from container %s resolv.conf: %w", c.ID(), err)
+ }
+
+ return nil
+}
+
+func getLocalhostHostEntry(c *Container) etchosts.HostEntries {
+ return etchosts.HostEntries{{IP: "127.0.0.1", Names: []string{c.Hostname(), c.config.Name}}}
+}
+
+// getHostsEntries returns the container ip host entries for the correct netmode
+func (c *Container) getHostsEntries() (etchosts.HostEntries, error) {
+ var entries etchosts.HostEntries
+ names := []string{c.Hostname(), c.config.Name}
+ switch {
+ case c.config.NetMode.IsBridge():
+ entries = etchosts.GetNetworkHostEntries(c.state.NetworkStatus, names...)
+ case c.config.NetMode.IsSlirp4netns():
+ ip, err := GetSlirp4netnsIP(c.slirp4netnsSubnet)
+ if err != nil {
+ return nil, err
+ }
+ entries = etchosts.HostEntries{{IP: ip.String(), Names: names}}
+ default:
+ if c.hasNetNone() {
+ entries = etchosts.HostEntries{{IP: "127.0.0.1", Names: names}}
+ }
+ }
+ return entries, nil
+}
+
+func (c *Container) createHosts() error {
+ var containerIPsEntries etchosts.HostEntries
+ var err error
+ // if we configure the netns after the container create we should not add
+ // the hosts here since we have no information about the actual ips
+ // instead we will add them in c.completeNetworkSetup()
+ if !c.config.PostConfigureNetNS {
+ containerIPsEntries, err = c.getHostsEntries()
+ if err != nil {
+ return fmt.Errorf("failed to get container ip host entries: %w", err)
+ }
+ }
+ baseHostFile, err := etchosts.GetBaseHostFile(c.runtime.config.Containers.BaseHostsFile, c.state.Mountpoint)
+ if err != nil {
+ return err
+ }
+
+ targetFile := filepath.Join(c.state.RunDir, "hosts")
+ err = etchosts.New(&etchosts.Params{
+ BaseFile: baseHostFile,
+ ExtraHosts: c.config.HostAdd,
+ ContainerIPs: containerIPsEntries,
+ HostContainersInternalIP: etchosts.GetHostContainersInternalIP(c.runtime.config, c.state.NetworkStatus, c.runtime.network),
+ TargetFile: targetFile,
+ })
+ if err != nil {
+ return err
+ }
+
+ return c.bindMountRootFile(targetFile, config.DefaultHostsFile)
+}
+
+// bindMountRootFile will chown and relabel the source file to make it usable in the container.
+// It will also add the path to the container bind mount map.
+// source is the path on the host, dest is the path in the container.
+func (c *Container) bindMountRootFile(source, dest string) error {
+ if err := os.Chown(source, c.RootUID(), c.RootGID()); err != nil {
+ return err
+ }
+ if err := label.Relabel(source, c.MountLabel(), false); err != nil {
+ return err
+ }
+
+ return c.mountIntoRootDirs(dest, source)
+}
+
+// generateGroupEntry generates an entry or entries into /etc/group as
+// required by container configuration.
+// Generally speaking, we will make an entry under two circumstances:
+// 1. The container is started as a specific user:group, and that group is both
+// numeric, and does not already exist in /etc/group.
+// 2. It is requested that Libpod add the group that launched Podman to
+// /etc/group via AddCurrentUserPasswdEntry (though this does not trigger if
+// the group in question already exists in /etc/passwd).
+//
+// Returns group entry (as a string that can be appended to /etc/group) and any
+// error that occurred.
+func (c *Container) generateGroupEntry() (string, error) {
+ groupString := ""
+
+ // Things we *can't* handle: adding the user we added in
+ // generatePasswdEntry to any *existing* groups.
+ addedGID := 0
+ if c.config.AddCurrentUserPasswdEntry {
+ entry, gid, err := c.generateCurrentUserGroupEntry()
+ if err != nil {
+ return "", err
+ }
+ groupString += entry
+ addedGID = gid
+ }
+ if c.config.User != "" {
+ entry, err := c.generateUserGroupEntry(addedGID)
+ if err != nil {
+ return "", err
+ }
+ groupString += entry
+ }
+
+ return groupString, nil
+}
+
+// Make an entry in /etc/group for the group of the user running podman iff we
+// are rootless.
+func (c *Container) generateCurrentUserGroupEntry() (string, int, error) {
+ gid := rootless.GetRootlessGID()
+ if gid == 0 {
+ return "", 0, nil
+ }
+
+ g, err := user.LookupGroupId(strconv.Itoa(gid))
+ if err != nil {
+ return "", 0, fmt.Errorf("failed to get current group: %w", err)
+ }
+
+ // Look up group name to see if it exists in the image.
+ _, err = lookup.GetGroup(c.state.Mountpoint, g.Name)
+ if err != runcuser.ErrNoGroupEntries {
+ return "", 0, err
+ }
+
+ // Look up GID to see if it exists in the image.
+ _, err = lookup.GetGroup(c.state.Mountpoint, g.Gid)
+ if err != runcuser.ErrNoGroupEntries {
+ return "", 0, err
+ }
+
+ // We need to get the username of the rootless user so we can add it to
+ // the group.
+ username := ""
+ uid := rootless.GetRootlessUID()
+ if uid != 0 {
+ u, err := user.LookupId(strconv.Itoa(uid))
+ if err != nil {
+ return "", 0, fmt.Errorf("failed to get current user to make group entry: %w", err)
+ }
+ username = u.Username
+ }
+
+ // Make the entry.
+ return fmt.Sprintf("%s:x:%s:%s\n", g.Name, g.Gid, username), gid, nil
+}
+
+// Make an entry in /etc/group for the group the container was specified to run
+// as.
+func (c *Container) generateUserGroupEntry(addedGID int) (string, error) {
+ if c.config.User == "" {
+ return "", nil
+ }
+
+ splitUser := strings.SplitN(c.config.User, ":", 2)
+ group := splitUser[0]
+ if len(splitUser) > 1 {
+ group = splitUser[1]
+ }
+
+ gid, err := strconv.ParseUint(group, 10, 32)
+ if err != nil {
+ return "", nil //nolint: nilerr
+ }
+
+ if addedGID != 0 && addedGID == int(gid) {
+ return "", nil
+ }
+
+ // Check if the group already exists
+ _, err = lookup.GetGroup(c.state.Mountpoint, group)
+ if err != runcuser.ErrNoGroupEntries {
+ return "", err
+ }
+
+ return fmt.Sprintf("%d:x:%d:%s\n", gid, gid, splitUser[0]), nil
+}
+
+// generatePasswdEntry generates an entry or entries into /etc/passwd as
+// required by container configuration.
+// Generally speaking, we will make an entry under two circumstances:
+// 1. The container is started as a specific user who is not in /etc/passwd.
+// This only triggers if the user is given as a *numeric* ID.
+// 2. It is requested that Libpod add the user that launched Podman to
+// /etc/passwd via AddCurrentUserPasswdEntry (though this does not trigger if
+// the user in question already exists in /etc/passwd) or the UID to be added
+// is 0).
+// 3. The user specified additional host user accounts to add the the /etc/passwd file
+//
+// Returns password entry (as a string that can be appended to /etc/passwd) and
+// any error that occurred.
+func (c *Container) generatePasswdEntry() (string, error) {
+ passwdString := ""
+
+ addedUID := 0
+ for _, userid := range c.config.HostUsers {
+ // Look up User on host
+ u, err := util.LookupUser(userid)
+ if err != nil {
+ return "", err
+ }
+ entry, err := c.userPasswdEntry(u)
+ if err != nil {
+ return "", err
+ }
+ passwdString += entry
+ }
+ if c.config.AddCurrentUserPasswdEntry {
+ entry, uid, _, err := c.generateCurrentUserPasswdEntry()
+ if err != nil {
+ return "", err
+ }
+ passwdString += entry
+ addedUID = uid
+ }
+ if c.config.User != "" {
+ entry, err := c.generateUserPasswdEntry(addedUID)
+ if err != nil {
+ return "", err
+ }
+ passwdString += entry
+ }
+
+ return passwdString, nil
+}
+
+// generateCurrentUserPasswdEntry generates an /etc/passwd entry for the user
+// running the container engine.
+// Returns a passwd entry for the user, and the UID and GID of the added entry.
+func (c *Container) generateCurrentUserPasswdEntry() (string, int, int, error) {
+ uid := rootless.GetRootlessUID()
+ if uid == 0 {
+ return "", 0, 0, nil
+ }
+
+ u, err := user.LookupId(strconv.Itoa(uid))
+ if err != nil {
+ return "", 0, 0, fmt.Errorf("failed to get current user: %w", err)
+ }
+ pwd, err := c.userPasswdEntry(u)
+ if err != nil {
+ return "", 0, 0, err
+ }
+
+ return pwd, uid, rootless.GetRootlessGID(), nil
+}
+
+func (c *Container) userPasswdEntry(u *user.User) (string, error) {
+ // Look up the user to see if it exists in the container image.
+ _, err := lookup.GetUser(c.state.Mountpoint, u.Username)
+ if err != runcuser.ErrNoPasswdEntries {
+ return "", err
+ }
+
+ // Look up the UID to see if it exists in the container image.
+ _, err = lookup.GetUser(c.state.Mountpoint, u.Uid)
+ if err != runcuser.ErrNoPasswdEntries {
+ return "", err
+ }
+
+ // If the user's actual home directory exists, or was mounted in - use
+ // that.
+ homeDir := c.WorkingDir()
+ hDir := u.HomeDir
+ for hDir != "/" {
+ if MountExists(c.config.Spec.Mounts, hDir) {
+ homeDir = u.HomeDir
+ break
+ }
+ hDir = filepath.Dir(hDir)
+ }
+ if homeDir != u.HomeDir {
+ for _, hDir := range c.UserVolumes() {
+ if hDir == u.HomeDir {
+ homeDir = u.HomeDir
+ break
+ }
+ }
+ }
+ // Set HOME environment if not already set
+ hasHomeSet := false
+ for _, s := range c.config.Spec.Process.Env {
+ if strings.HasPrefix(s, "HOME=") {
+ hasHomeSet = true
+ break
+ }
+ }
+ if !hasHomeSet {
+ c.config.Spec.Process.Env = append(c.config.Spec.Process.Env, fmt.Sprintf("HOME=%s", homeDir))
+ }
+ if c.config.PasswdEntry != "" {
+ return c.passwdEntry(u.Username, u.Uid, u.Gid, u.Name, homeDir), nil
+ }
+
+ return fmt.Sprintf("%s:*:%s:%s:%s:%s:/bin/sh\n", u.Username, u.Uid, u.Gid, u.Name, homeDir), nil
+}
+
+// generateUserPasswdEntry generates an /etc/passwd entry for the container user
+// to run in the container.
+// The UID and GID of the added entry will also be returned.
+// Accepts one argument, that being any UID that has already been added to the
+// passwd file by other functions; if it matches the UID we were given, we don't
+// need to do anything.
+func (c *Container) generateUserPasswdEntry(addedUID int) (string, error) {
+ var (
+ groupspec string
+ gid int
+ )
+ if c.config.User == "" {
+ return "", nil
+ }
+ splitSpec := strings.SplitN(c.config.User, ":", 2)
+ userspec := splitSpec[0]
+ if len(splitSpec) > 1 {
+ groupspec = splitSpec[1]
+ }
+ // If a non numeric User, then don't generate passwd
+ uid, err := strconv.ParseUint(userspec, 10, 32)
+ if err != nil {
+ return "", nil //nolint: nilerr
+ }
+
+ if addedUID != 0 && int(uid) == addedUID {
+ return "", nil
+ }
+
+ // Look up the user to see if it exists in the container image
+ _, err = lookup.GetUser(c.state.Mountpoint, userspec)
+ if err != runcuser.ErrNoPasswdEntries {
+ return "", err
+ }
+
+ if groupspec != "" {
+ ugid, err := strconv.ParseUint(groupspec, 10, 32)
+ if err == nil {
+ gid = int(ugid)
+ } else {
+ group, err := lookup.GetGroup(c.state.Mountpoint, groupspec)
+ if err != nil {
+ return "", fmt.Errorf("unable to get gid %s from group file: %w", groupspec, err)
+ }
+ gid = group.Gid
+ }
+ }
+
+ if c.config.PasswdEntry != "" {
+ entry := c.passwdEntry(fmt.Sprintf("%d", uid), fmt.Sprintf("%d", uid), fmt.Sprintf("%d", gid), "container user", c.WorkingDir())
+ return entry, nil
+ }
+
+ return fmt.Sprintf("%d:*:%d:%d:container user:%s:/bin/sh\n", uid, uid, gid, c.WorkingDir()), nil
+}
+
+func (c *Container) passwdEntry(username string, uid, gid, name, homeDir string) string {
+ s := c.config.PasswdEntry
+ s = strings.ReplaceAll(s, "$USERNAME", username)
+ s = strings.ReplaceAll(s, "$UID", uid)
+ s = strings.ReplaceAll(s, "$GID", gid)
+ s = strings.ReplaceAll(s, "$NAME", name)
+ s = strings.ReplaceAll(s, "$HOME", homeDir)
+ return s + "\n"
+}
+
+// generatePasswdAndGroup generates container-specific passwd and group files
+// iff g.config.User is a number or we are configured to make a passwd entry for
+// the current user or the user specified HostsUsers
+// Returns path to file to mount at /etc/passwd, path to file to mount at
+// /etc/group, and any error that occurred. If no passwd/group file were
+// required, the empty string will be returned for those path (this may occur
+// even if no error happened).
+// This may modify the mounted container's /etc/passwd and /etc/group instead of
+// making copies to bind-mount in, so we don't break useradd (it wants to make a
+// copy of /etc/passwd and rename the copy to /etc/passwd, which is impossible
+// with a bind mount). This is done in cases where the container is *not*
+// read-only. In this case, the function will return nothing ("", "", nil).
+func (c *Container) generatePasswdAndGroup() (string, string, error) {
+ if !c.config.AddCurrentUserPasswdEntry && c.config.User == "" &&
+ len(c.config.HostUsers) == 0 {
+ return "", "", nil
+ }
+
+ needPasswd := true
+ needGroup := true
+
+ // First, check if there's a mount at /etc/passwd or group, we don't
+ // want to interfere with user mounts.
+ if MountExists(c.config.Spec.Mounts, "/etc/passwd") {
+ needPasswd = false
+ }
+ if MountExists(c.config.Spec.Mounts, "/etc/group") {
+ needGroup = false
+ }
+
+ // Next, check if we already made the files. If we didn't, don't need to
+ // do anything more.
+ if needPasswd {
+ passwdPath := filepath.Join(c.config.StaticDir, "passwd")
+ if _, err := os.Stat(passwdPath); err == nil {
+ needPasswd = false
+ }
+ }
+ if needGroup {
+ groupPath := filepath.Join(c.config.StaticDir, "group")
+ if _, err := os.Stat(groupPath); err == nil {
+ needGroup = false
+ }
+ }
+
+ // If we don't need a /etc/passwd or /etc/group at this point we can
+ // just return.
+ if !needPasswd && !needGroup {
+ return "", "", nil
+ }
+
+ passwdPath := ""
+ groupPath := ""
+
+ ro := c.IsReadOnly()
+
+ if needPasswd {
+ passwdEntry, err := c.generatePasswdEntry()
+ if err != nil {
+ return "", "", err
+ }
+
+ needsWrite := passwdEntry != ""
+ switch {
+ case ro && needsWrite:
+ logrus.Debugf("Making /etc/passwd for container %s", c.ID())
+ originPasswdFile, err := securejoin.SecureJoin(c.state.Mountpoint, "/etc/passwd")
+ if err != nil {
+ return "", "", fmt.Errorf("creating path to container %s /etc/passwd: %w", c.ID(), err)
+ }
+ orig, err := ioutil.ReadFile(originPasswdFile)
+ if err != nil && !os.IsNotExist(err) {
+ return "", "", err
+ }
+ passwdFile, err := c.writeStringToStaticDir("passwd", string(orig)+passwdEntry)
+ if err != nil {
+ return "", "", fmt.Errorf("failed to create temporary passwd file: %w", err)
+ }
+ if err := os.Chmod(passwdFile, 0644); err != nil {
+ return "", "", err
+ }
+ passwdPath = passwdFile
+ case !ro && needsWrite:
+ logrus.Debugf("Modifying container %s /etc/passwd", c.ID())
+ containerPasswd, err := securejoin.SecureJoin(c.state.Mountpoint, "/etc/passwd")
+ if err != nil {
+ return "", "", fmt.Errorf("looking up location of container %s /etc/passwd: %w", c.ID(), err)
+ }
+
+ f, err := os.OpenFile(containerPasswd, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0600)
+ if err != nil {
+ return "", "", fmt.Errorf("container %s: %w", c.ID(), err)
+ }
+ defer f.Close()
+
+ if _, err := f.WriteString(passwdEntry); err != nil {
+ return "", "", fmt.Errorf("unable to append to container %s /etc/passwd: %w", c.ID(), err)
+ }
+ default:
+ logrus.Debugf("Not modifying container %s /etc/passwd", c.ID())
+ }
+ }
+ if needGroup {
+ groupEntry, err := c.generateGroupEntry()
+ if err != nil {
+ return "", "", err
+ }
+
+ needsWrite := groupEntry != ""
+ switch {
+ case ro && needsWrite:
+ logrus.Debugf("Making /etc/group for container %s", c.ID())
+ originGroupFile, err := securejoin.SecureJoin(c.state.Mountpoint, "/etc/group")
+ if err != nil {
+ return "", "", fmt.Errorf("creating path to container %s /etc/group: %w", c.ID(), err)
+ }
+ orig, err := ioutil.ReadFile(originGroupFile)
+ if err != nil && !os.IsNotExist(err) {
+ return "", "", err
+ }
+ groupFile, err := c.writeStringToStaticDir("group", string(orig)+groupEntry)
+ if err != nil {
+ return "", "", fmt.Errorf("failed to create temporary group file: %w", err)
+ }
+ if err := os.Chmod(groupFile, 0644); err != nil {
+ return "", "", err
+ }
+ groupPath = groupFile
+ case !ro && needsWrite:
+ logrus.Debugf("Modifying container %s /etc/group", c.ID())
+ containerGroup, err := securejoin.SecureJoin(c.state.Mountpoint, "/etc/group")
+ if err != nil {
+ return "", "", fmt.Errorf("looking up location of container %s /etc/group: %w", c.ID(), err)
+ }
+
+ f, err := os.OpenFile(containerGroup, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0600)
+ if err != nil {
+ return "", "", fmt.Errorf("container %s: %w", c.ID(), err)
+ }
+ defer f.Close()
+
+ if _, err := f.WriteString(groupEntry); err != nil {
+ return "", "", fmt.Errorf("unable to append to container %s /etc/group: %w", c.ID(), err)
+ }
+ default:
+ logrus.Debugf("Not modifying container %s /etc/group", c.ID())
+ }
+ }
+
+ return passwdPath, groupPath, nil
+}
+
+func (c *Container) copyTimezoneFile(zonePath string) (string, error) {
+ localtimeCopy := filepath.Join(c.state.RunDir, "localtime")
+ file, err := os.Stat(zonePath)
+ if err != nil {
+ return "", err
+ }
+ if file.IsDir() {
+ return "", errors.New("invalid timezone: is a directory")
+ }
+ src, err := os.Open(zonePath)
+ if err != nil {
+ return "", err
+ }
+ defer src.Close()
+ dest, err := os.Create(localtimeCopy)
+ if err != nil {
+ return "", err
+ }
+ defer dest.Close()
+ _, err = io.Copy(dest, src)
+ if err != nil {
+ return "", err
+ }
+ if err := c.relabel(localtimeCopy, c.config.MountLabel, false); err != nil {
+ return "", err
+ }
+ if err := dest.Chown(c.RootUID(), c.RootGID()); err != nil {
+ return "", err
+ }
+ return localtimeCopy, err
+}
+
+func (c *Container) cleanupOverlayMounts() error {
+ return overlay.CleanupContent(c.config.StaticDir)
+}
+
+// Creates and mounts an empty dir to mount secrets into, if it does not already exist
+func (c *Container) createSecretMountDir() error {
+ src := filepath.Join(c.state.RunDir, "/run/secrets")
+ _, err := os.Stat(src)
+ if os.IsNotExist(err) {
+ oldUmask := umask.Set(0)
+ defer umask.Set(oldUmask)
+
+ if err := os.MkdirAll(src, 0755); err != nil {
+ return err
+ }
+ if err := label.Relabel(src, c.config.MountLabel, false); err != nil {
+ return err
+ }
+ if err := os.Chown(src, c.RootUID(), c.RootGID()); err != nil {
+ return err
+ }
+ c.state.BindMounts["/run/secrets"] = src
+ return nil
+ }
+
+ return err
+}
+
+// Fix ownership and permissions of the specified volume if necessary.
+func (c *Container) fixVolumePermissions(v *ContainerNamedVolume) error {
+ vol, err := c.runtime.state.Volume(v.Name)
+ if err != nil {
+ return fmt.Errorf("retrieving named volume %s for container %s: %w", v.Name, c.ID(), err)
+ }
+
+ vol.lock.Lock()
+ defer vol.lock.Unlock()
+
+ // The volume may need a copy-up. Check the state.
+ if err := vol.update(); err != nil {
+ return err
+ }
+
+ // Volumes owned by a volume driver are not chowned - we don't want to
+ // mess with a mount not managed by us.
+ if vol.state.NeedsChown && !vol.UsesVolumeDriver() {
+ vol.state.NeedsChown = false
+
+ uid := int(c.config.Spec.Process.User.UID)
+ gid := int(c.config.Spec.Process.User.GID)
+
+ if c.config.IDMappings.UIDMap != nil {
+ p := idtools.IDPair{
+ UID: uid,
+ GID: gid,
+ }
+ mappings := idtools.NewIDMappingsFromMaps(c.config.IDMappings.UIDMap, c.config.IDMappings.GIDMap)
+ newPair, err := mappings.ToHost(p)
+ if err != nil {
+ return fmt.Errorf("mapping user %d:%d: %w", uid, gid, err)
+ }
+ uid = newPair.UID
+ gid = newPair.GID
+ }
+
+ vol.state.UIDChowned = uid
+ vol.state.GIDChowned = gid
+
+ if err := vol.save(); err != nil {
+ return err
+ }
+
+ mountPoint, err := vol.MountPoint()
+ if err != nil {
+ return err
+ }
+
+ if err := os.Lchown(mountPoint, uid, gid); err != nil {
+ return err
+ }
+
+ // Make sure the new volume matches the permissions of the target directory.
+ // https://github.com/containers/podman/issues/10188
+ st, err := os.Lstat(filepath.Join(c.state.Mountpoint, v.Dest))
+ if err == nil {
+ if stat, ok := st.Sys().(*syscall.Stat_t); ok {
+ if err := os.Lchown(mountPoint, int(stat.Uid), int(stat.Gid)); err != nil {
+ return err
+ }
+ }
+ if err := os.Chmod(mountPoint, st.Mode()); err != nil {
+ return err
+ }
+ if err := setVolumeAtime(mountPoint, st); err != nil {
+ return err
+ }
+ } else if !os.IsNotExist(err) {
+ return err
+ }
+ }
+ return nil
+}
+
+func (c *Container) relabel(src, mountLabel string, recurse bool) error {
+ if !selinux.GetEnabled() || mountLabel == "" {
+ return nil
+ }
+ // only relabel on initial creation of container
+ if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateUnknown) {
+ label, err := label.FileLabel(src)
+ if err != nil {
+ return err
+ }
+ // If labels are different, might be on a tmpfs
+ if label == mountLabel {
+ return nil
+ }
+ }
+ return label.Relabel(src, mountLabel, recurse)
+}
+
+func (c *Container) ChangeHostPathOwnership(src string, recurse bool, uid, gid int) error {
+ // only chown on initial creation of container
+ if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateUnknown) {
+ st, err := os.Stat(src)
+ if err != nil {
+ return err
+ }
+
+ // If labels are different, might be on a tmpfs
+ if int(st.Sys().(*syscall.Stat_t).Uid) == uid && int(st.Sys().(*syscall.Stat_t).Gid) == gid {
+ return nil
+ }
+ }
+ return chown.ChangeHostPathOwnership(src, recurse, uid, gid)
+}
diff --git a/libpod/container_internal_freebsd.go b/libpod/container_internal_freebsd.go
new file mode 100644
index 000000000..67f87a98d
--- /dev/null
+++ b/libpod/container_internal_freebsd.go
@@ -0,0 +1,274 @@
+//go:build freebsd
+// +build freebsd
+
+package libpod
+
+import (
+ "fmt"
+ "os"
+ "strings"
+ "sync"
+ "syscall"
+ "time"
+
+ "github.com/containers/common/libnetwork/types"
+ "github.com/containers/podman/v4/pkg/rootless"
+ spec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/opencontainers/runtime-tools/generate"
+ "github.com/sirupsen/logrus"
+ "golang.org/x/sys/unix"
+)
+
+var (
+ bindOptions = []string{}
+)
+
+func (c *Container) mountSHM(shmOptions string) error {
+ return nil
+}
+
+func (c *Container) unmountSHM(path string) error {
+ return nil
+}
+
+// prepare mounts the container and sets up other required resources like net
+// namespaces
+func (c *Container) prepare() error {
+ var (
+ wg sync.WaitGroup
+ ctrNS *jailNetNS
+ networkStatus map[string]types.StatusBlock
+ createNetNSErr, mountStorageErr error
+ mountPoint string
+ tmpStateLock sync.Mutex
+ )
+
+ wg.Add(2)
+
+ go func() {
+ defer wg.Done()
+ // Set up network namespace if not already set up
+ noNetNS := c.state.NetNS == nil
+ if c.config.CreateNetNS && noNetNS && !c.config.PostConfigureNetNS {
+ ctrNS, networkStatus, createNetNSErr = c.runtime.createNetNS(c)
+ if createNetNSErr != nil {
+ return
+ }
+
+ tmpStateLock.Lock()
+ defer tmpStateLock.Unlock()
+
+ // Assign NetNS attributes to container
+ c.state.NetNS = ctrNS
+ c.state.NetworkStatus = networkStatus
+ }
+ }()
+ // Mount storage if not mounted
+ go func() {
+ defer wg.Done()
+ mountPoint, mountStorageErr = c.mountStorage()
+
+ if mountStorageErr != nil {
+ return
+ }
+
+ tmpStateLock.Lock()
+ defer tmpStateLock.Unlock()
+
+ // Finish up mountStorage
+ c.state.Mounted = true
+ c.state.Mountpoint = mountPoint
+
+ logrus.Debugf("Created root filesystem for container %s at %s", c.ID(), c.state.Mountpoint)
+ }()
+
+ wg.Wait()
+
+ var createErr error
+ if mountStorageErr != nil {
+ if createErr != nil {
+ logrus.Errorf("Preparing container %s: %v", c.ID(), createErr)
+ }
+ createErr = mountStorageErr
+ }
+
+ if createErr != nil {
+ return createErr
+ }
+
+ // Save changes to container state
+ if err := c.save(); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+// cleanupNetwork unmounts and cleans up the container's network
+func (c *Container) cleanupNetwork() error {
+ if c.config.NetNsCtr != "" {
+ return nil
+ }
+ netDisabled, err := c.NetworkDisabled()
+ if err != nil {
+ return err
+ }
+ if netDisabled {
+ return nil
+ }
+
+ // Stop the container's network namespace (if it has one)
+ if err := c.runtime.teardownNetNS(c); err != nil {
+ logrus.Errorf("Unable to cleanup network for container %s: %q", c.ID(), err)
+ }
+
+ if c.valid {
+ return c.save()
+ }
+
+ return nil
+}
+
+// reloadNetwork reloads the network for the given container, recreating
+// firewall rules.
+func (c *Container) reloadNetwork() error {
+ result, err := c.runtime.reloadContainerNetwork(c)
+ if err != nil {
+ return err
+ }
+
+ c.state.NetworkStatus = result
+
+ return c.save()
+}
+
+// Add an existing container's network jail
+func (c *Container) addNetworkContainer(g *generate.Generator, ctr string) error {
+ nsCtr, err := c.runtime.state.Container(ctr)
+ c.runtime.state.UpdateContainer(nsCtr)
+ if err != nil {
+ return fmt.Errorf("retrieving dependency %s of container %s from state: %w", ctr, c.ID(), err)
+ }
+ g.AddAnnotation("org.freebsd.parentJail", nsCtr.state.NetNS.Name)
+ return nil
+}
+
+func isRootlessCgroupSet(cgroup string) bool {
+ return false
+}
+
+func (c *Container) expectPodCgroup() (bool, error) {
+ return false, nil
+}
+
+func (c *Container) getOCICgroupPath() (string, error) {
+ return "", nil
+}
+
+func openDirectory(path string) (fd int, err error) {
+ const O_PATH = 0x00400000
+ return unix.Open(path, unix.O_RDONLY|O_PATH, 0)
+}
+
+func (c *Container) addNetworkNamespace(g *generate.Generator) error {
+ if c.config.CreateNetNS {
+ g.AddAnnotation("org.freebsd.parentJail", c.state.NetNS.Name)
+ }
+ return nil
+}
+
+func (c *Container) addSystemdMounts(g *generate.Generator) error {
+ return nil
+}
+
+func (c *Container) addSharedNamespaces(g *generate.Generator) error {
+ if c.config.NetNsCtr != "" {
+ if err := c.addNetworkContainer(g, c.config.NetNsCtr); err != nil {
+ return err
+ }
+ }
+
+ availableUIDs, availableGIDs, err := rootless.GetAvailableIDMaps()
+ if err != nil {
+ if os.IsNotExist(err) {
+ // The kernel-provided files only exist if user namespaces are supported
+ logrus.Debugf("User or group ID mappings not available: %s", err)
+ } else {
+ return err
+ }
+ } else {
+ g.Config.Linux.UIDMappings = rootless.MaybeSplitMappings(g.Config.Linux.UIDMappings, availableUIDs)
+ g.Config.Linux.GIDMappings = rootless.MaybeSplitMappings(g.Config.Linux.GIDMappings, availableGIDs)
+ }
+
+ // Hostname handling:
+ // If we have a UTS namespace, set Hostname in the OCI spec.
+ // Set the HOSTNAME environment variable unless explicitly overridden by
+ // the user (already present in OCI spec). If we don't have a UTS ns,
+ // set it to the host's hostname instead.
+ hostname := c.Hostname()
+ foundUTS := false
+
+ // TODO: make this optional, needs progress on adding FreeBSD section to the spec
+ foundUTS = true
+ g.SetHostname(hostname)
+
+ if !foundUTS {
+ tmpHostname, err := os.Hostname()
+ if err != nil {
+ return err
+ }
+ hostname = tmpHostname
+ }
+ needEnv := true
+ for _, checkEnv := range g.Config.Process.Env {
+ if strings.SplitN(checkEnv, "=", 2)[0] == "HOSTNAME" {
+ needEnv = false
+ break
+ }
+ }
+ if needEnv {
+ g.AddProcessEnv("HOSTNAME", hostname)
+ }
+ return nil
+}
+
+func (c *Container) addRootPropagation(g *generate.Generator, mounts []spec.Mount) error {
+ return nil
+}
+
+func (c *Container) setProcessLabel(g *generate.Generator) {
+}
+
+func (c *Container) setMountLabel(g *generate.Generator) {
+}
+
+func (c *Container) setCgroupsPath(g *generate.Generator) error {
+ return nil
+}
+
+func (c *Container) addSlirp4netnsDNS(nameservers []string) []string {
+ return nameservers
+}
+
+func (c *Container) isSlirp4netnsIPv6() (bool, error) {
+ return false, nil
+}
+
+// check for net=none
+func (c *Container) hasNetNone() bool {
+ return c.state.NetNS == nil
+}
+
+func setVolumeAtime(mountPoint string, st os.FileInfo) error {
+ stat := st.Sys().(*syscall.Stat_t)
+ atime := time.Unix(int64(stat.Atimespec.Sec), int64(stat.Atimespec.Nsec)) //nolint: unconvert
+ if err := os.Chtimes(mountPoint, atime, st.ModTime()); err != nil {
+ return err
+ }
+ return nil
+}
+
+func (c *Container) makePlatformBindMounts() error {
+ return nil
+}
diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go
index a131ab367..ef8649776 100644
--- a/libpod/container_internal_linux.go
+++ b/libpod/container_internal_linux.go
@@ -4,64 +4,34 @@
package libpod
import (
- "context"
"errors"
"fmt"
- "io"
- "io/ioutil"
- "math"
"os"
- "os/user"
"path"
"path/filepath"
- "strconv"
"strings"
"sync"
"syscall"
"time"
- metadata "github.com/checkpoint-restore/checkpointctl/lib"
- "github.com/checkpoint-restore/go-criu/v5/stats"
- cdi "github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/containernetworking/plugins/pkg/ns"
- "github.com/containers/buildah"
- "github.com/containers/buildah/pkg/chrootuser"
- "github.com/containers/buildah/pkg/overlay"
- butil "github.com/containers/buildah/util"
- "github.com/containers/common/libnetwork/etchosts"
- "github.com/containers/common/libnetwork/resolvconf"
"github.com/containers/common/libnetwork/types"
- "github.com/containers/common/pkg/apparmor"
"github.com/containers/common/pkg/cgroups"
- "github.com/containers/common/pkg/chown"
"github.com/containers/common/pkg/config"
- "github.com/containers/common/pkg/subscriptions"
- "github.com/containers/common/pkg/umask"
- cutil "github.com/containers/common/pkg/util"
- is "github.com/containers/image/v5/storage"
"github.com/containers/podman/v4/libpod/define"
- "github.com/containers/podman/v4/libpod/events"
- "github.com/containers/podman/v4/pkg/annotations"
- "github.com/containers/podman/v4/pkg/checkpoint/crutils"
- "github.com/containers/podman/v4/pkg/criu"
- "github.com/containers/podman/v4/pkg/lookup"
"github.com/containers/podman/v4/pkg/rootless"
- "github.com/containers/podman/v4/pkg/util"
"github.com/containers/podman/v4/utils"
- "github.com/containers/podman/v4/version"
- "github.com/containers/storage/pkg/archive"
- "github.com/containers/storage/pkg/idtools"
- "github.com/containers/storage/pkg/lockfile"
- securejoin "github.com/cyphar/filepath-securejoin"
- runcuser "github.com/opencontainers/runc/libcontainer/user"
spec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/runtime-tools/generate"
- "github.com/opencontainers/selinux/go-selinux"
"github.com/opencontainers/selinux/go-selinux/label"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
+var (
+ bindOptions = []string{"bind", "rprivate"}
+)
+
func (c *Container) mountSHM(shmOptions string) error {
if err := unix.Mount("shm", c.config.ShmDir, "tmpfs", unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV,
label.FormatMountLabel(shmOptions, c.config.MountLabel)); err != nil {
@@ -73,7 +43,7 @@ func (c *Container) mountSHM(shmOptions string) error {
func (c *Container) unmountSHM(mount string) error {
if err := unix.Unmount(mount, 0); err != nil {
if err != syscall.EINVAL && err != syscall.ENOENT {
- return fmt.Errorf("error unmounting container %s SHM mount %s: %w", c.ID(), mount, err)
+ return fmt.Errorf("unmounting container %s SHM mount %s: %w", c.ID(), mount, err)
}
// If it's just an EINVAL or ENOENT, debug logs only
logrus.Debugf("Container %s failed to unmount %s : %v", c.ID(), mount, err)
@@ -152,7 +122,7 @@ func (c *Container) prepare() error {
// createErr is guaranteed non-nil, so print
// unconditionally
logrus.Errorf("Preparing container %s: %v", c.ID(), createErr)
- createErr = fmt.Errorf("error unmounting storage for container %s after network create failure: %w", c.ID(), err)
+ createErr = fmt.Errorf("unmounting storage for container %s after network create failure: %w", c.ID(), err)
}
}
@@ -161,7 +131,7 @@ func (c *Container) prepare() error {
if createErr != nil {
if err := c.cleanupNetwork(); err != nil {
logrus.Errorf("Preparing container %s: %v", c.ID(), createErr)
- createErr = fmt.Errorf("error cleaning up container %s network after setup failure: %w", c.ID(), err)
+ createErr = fmt.Errorf("cleaning up container %s network after setup failure: %w", c.ID(), err)
}
}
@@ -177,118 +147,6 @@ func (c *Container) prepare() error {
return nil
}
-// isWorkDirSymlink returns true if resolved workdir is symlink or a chain of symlinks,
-// and final resolved target is present either on volume, mount or inside of container
-// otherwise it returns false. Following function is meant for internal use only and
-// can change at any point of time.
-func (c *Container) isWorkDirSymlink(resolvedPath string) bool {
- // We cannot create workdir since explicit --workdir is
- // set in config but workdir could also be a symlink.
- // If it's a symlink, check if the resolved target is present in the container.
- // If so, that's a valid use case: return nil.
-
- maxSymLinks := 0
- for {
- // Linux only supports a chain of 40 links.
- // Reference: https://github.com/torvalds/linux/blob/master/include/linux/namei.h#L13
- if maxSymLinks > 40 {
- break
- }
- resolvedSymlink, err := os.Readlink(resolvedPath)
- if err != nil {
- // End sym-link resolution loop.
- break
- }
- if resolvedSymlink != "" {
- _, resolvedSymlinkWorkdir, err := c.resolvePath(c.state.Mountpoint, resolvedSymlink)
- if isPathOnVolume(c, resolvedSymlinkWorkdir) || isPathOnBindMount(c, resolvedSymlinkWorkdir) {
- // Resolved symlink exists on external volume or mount
- return true
- }
- if err != nil {
- // Could not resolve path so end sym-link resolution loop.
- break
- }
- if resolvedSymlinkWorkdir != "" {
- resolvedPath = resolvedSymlinkWorkdir
- _, err := os.Stat(resolvedSymlinkWorkdir)
- if err == nil {
- // Symlink resolved successfully and resolved path exists on container,
- // this is a valid use-case so return nil.
- logrus.Debugf("Workdir is a symlink with target to %q and resolved symlink exists on container", resolvedSymlink)
- return true
- }
- }
- }
- maxSymLinks++
- }
- return false
-}
-
-// resolveWorkDir resolves the container's workdir and, depending on the
-// configuration, will create it, or error out if it does not exist.
-// Note that the container must be mounted before.
-func (c *Container) resolveWorkDir() error {
- workdir := c.WorkingDir()
-
- // If the specified workdir is a subdir of a volume or mount,
- // we don't need to do anything. The runtime is taking care of
- // that.
- if isPathOnVolume(c, workdir) || isPathOnBindMount(c, workdir) {
- logrus.Debugf("Workdir %q resolved to a volume or mount", workdir)
- return nil
- }
-
- _, resolvedWorkdir, err := c.resolvePath(c.state.Mountpoint, workdir)
- if err != nil {
- return err
- }
- logrus.Debugf("Workdir %q resolved to host path %q", workdir, resolvedWorkdir)
-
- st, err := os.Stat(resolvedWorkdir)
- if err == nil {
- if !st.IsDir() {
- return fmt.Errorf("workdir %q exists on container %s, but is not a directory", workdir, c.ID())
- }
- return nil
- }
- if !c.config.CreateWorkingDir {
- // No need to create it (e.g., `--workdir=/foo`), so let's make sure
- // the path exists on the container.
- if err != nil {
- if os.IsNotExist(err) {
- // If resolved Workdir path gets marked as a valid symlink,
- // return nil cause this is valid use-case.
- if c.isWorkDirSymlink(resolvedWorkdir) {
- return nil
- }
- return fmt.Errorf("workdir %q does not exist on container %s", workdir, c.ID())
- }
- // This might be a serious error (e.g., permission), so
- // we need to return the full error.
- return fmt.Errorf("error detecting workdir %q on container %s: %w", workdir, c.ID(), err)
- }
- return nil
- }
- if err := os.MkdirAll(resolvedWorkdir, 0755); err != nil {
- if os.IsExist(err) {
- return nil
- }
- return fmt.Errorf("error creating container %s workdir: %w", c.ID(), err)
- }
-
- // Ensure container entrypoint is created (if required).
- uid, gid, _, err := chrootuser.GetUser(c.state.Mountpoint, c.User())
- if err != nil {
- return fmt.Errorf("error looking up %s inside of the container %s: %w", c.User(), c.ID(), err)
- }
- if err := os.Chown(resolvedWorkdir, int(uid), int(gid)); err != nil {
- return fmt.Errorf("error chowning container %s workdir to container root: %w", c.ID(), err)
- }
-
- return nil
-}
-
// cleanupNetwork unmounts and cleans up the container's network
func (c *Container) cleanupNetwork() error {
if c.config.NetNsCtr != "" {
@@ -335,671 +193,6 @@ func (c *Container) reloadNetwork() error {
return c.save()
}
-func (c *Container) getUserOverrides() *lookup.Overrides {
- var hasPasswdFile, hasGroupFile bool
- overrides := lookup.Overrides{}
- for _, m := range c.config.Spec.Mounts {
- if m.Destination == "/etc/passwd" {
- overrides.ContainerEtcPasswdPath = m.Source
- hasPasswdFile = true
- }
- if m.Destination == "/etc/group" {
- overrides.ContainerEtcGroupPath = m.Source
- hasGroupFile = true
- }
- if m.Destination == "/etc" {
- if !hasPasswdFile {
- overrides.ContainerEtcPasswdPath = filepath.Join(m.Source, "passwd")
- }
- if !hasGroupFile {
- overrides.ContainerEtcGroupPath = filepath.Join(m.Source, "group")
- }
- }
- }
- if path, ok := c.state.BindMounts["/etc/passwd"]; ok {
- overrides.ContainerEtcPasswdPath = path
- }
- return &overrides
-}
-
-func lookupHostUser(name string) (*runcuser.ExecUser, error) {
- var execUser runcuser.ExecUser
- // Look up User on host
- u, err := util.LookupUser(name)
- if err != nil {
- return &execUser, err
- }
- uid, err := strconv.ParseUint(u.Uid, 8, 32)
- if err != nil {
- return &execUser, err
- }
-
- gid, err := strconv.ParseUint(u.Gid, 8, 32)
- if err != nil {
- return &execUser, err
- }
- execUser.Uid = int(uid)
- execUser.Gid = int(gid)
- execUser.Home = u.HomeDir
- return &execUser, nil
-}
-
-// Internal only function which returns upper and work dir from
-// overlay options.
-func getOverlayUpperAndWorkDir(options []string) (string, string, error) {
- upperDir := ""
- workDir := ""
- for _, o := range options {
- if strings.HasPrefix(o, "upperdir") {
- splitOpt := strings.SplitN(o, "=", 2)
- if len(splitOpt) > 1 {
- upperDir = splitOpt[1]
- if upperDir == "" {
- return "", "", errors.New("cannot accept empty value for upperdir")
- }
- }
- }
- if strings.HasPrefix(o, "workdir") {
- splitOpt := strings.SplitN(o, "=", 2)
- if len(splitOpt) > 1 {
- workDir = splitOpt[1]
- if workDir == "" {
- return "", "", errors.New("cannot accept empty value for workdir")
- }
- }
- }
- }
- if (upperDir != "" && workDir == "") || (upperDir == "" && workDir != "") {
- return "", "", errors.New("must specify both upperdir and workdir")
- }
- return upperDir, workDir, nil
-}
-
-// Generate spec for a container
-// Accepts a map of the container's dependencies
-func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
- overrides := c.getUserOverrides()
- execUser, err := lookup.GetUserGroupInfo(c.state.Mountpoint, c.config.User, overrides)
- if err != nil {
- if cutil.StringInSlice(c.config.User, c.config.HostUsers) {
- execUser, err = lookupHostUser(c.config.User)
- }
- if err != nil {
- return nil, err
- }
- }
-
- // NewFromSpec() is deprecated according to its comment
- // however the recommended replace just causes a nil map panic
- //nolint:staticcheck
- g := generate.NewFromSpec(c.config.Spec)
-
- // If the flag to mount all devices is set for a privileged container, add
- // all the devices from the host's machine into the container
- if c.config.MountAllDevices {
- if err := util.AddPrivilegedDevices(&g); err != nil {
- return nil, err
- }
- }
-
- // If network namespace was requested, add it now
- if c.config.CreateNetNS {
- if c.config.PostConfigureNetNS {
- if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), ""); err != nil {
- return nil, err
- }
- } else {
- if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), c.state.NetNS.Path()); err != nil {
- return nil, err
- }
- }
- }
-
- // Apply AppArmor checks and load the default profile if needed.
- if len(c.config.Spec.Process.ApparmorProfile) > 0 {
- updatedProfile, err := apparmor.CheckProfileAndLoadDefault(c.config.Spec.Process.ApparmorProfile)
- if err != nil {
- return nil, err
- }
- g.SetProcessApparmorProfile(updatedProfile)
- }
-
- if err := c.makeBindMounts(); err != nil {
- return nil, err
- }
-
- if err := c.mountNotifySocket(g); err != nil {
- return nil, err
- }
-
- // Get host UID and GID based on the container process UID and GID.
- hostUID, hostGID, err := butil.GetHostIDs(util.IDtoolsToRuntimeSpec(c.config.IDMappings.UIDMap), util.IDtoolsToRuntimeSpec(c.config.IDMappings.GIDMap), uint32(execUser.Uid), uint32(execUser.Gid))
- if err != nil {
- return nil, err
- }
-
- // Add named volumes
- for _, namedVol := range c.config.NamedVolumes {
- volume, err := c.runtime.GetVolume(namedVol.Name)
- if err != nil {
- return nil, fmt.Errorf("error retrieving volume %s to add to container %s: %w", namedVol.Name, c.ID(), err)
- }
- mountPoint, err := volume.MountPoint()
- if err != nil {
- return nil, err
- }
-
- overlayFlag := false
- upperDir := ""
- workDir := ""
- for _, o := range namedVol.Options {
- if o == "O" {
- overlayFlag = true
- upperDir, workDir, err = getOverlayUpperAndWorkDir(namedVol.Options)
- if err != nil {
- return nil, err
- }
- }
- }
-
- if overlayFlag {
- var overlayMount spec.Mount
- var overlayOpts *overlay.Options
- contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID())
- if err != nil {
- return nil, err
- }
-
- overlayOpts = &overlay.Options{RootUID: c.RootUID(),
- RootGID: c.RootGID(),
- UpperDirOptionFragment: upperDir,
- WorkDirOptionFragment: workDir,
- GraphOpts: c.runtime.store.GraphOptions(),
- }
-
- overlayMount, err = overlay.MountWithOptions(contentDir, mountPoint, namedVol.Dest, overlayOpts)
- if err != nil {
- return nil, fmt.Errorf("mounting overlay failed %q: %w", mountPoint, err)
- }
-
- for _, o := range namedVol.Options {
- if o == "U" {
- if err := c.ChangeHostPathOwnership(mountPoint, true, int(hostUID), int(hostGID)); err != nil {
- return nil, err
- }
-
- if err := c.ChangeHostPathOwnership(contentDir, true, int(hostUID), int(hostGID)); err != nil {
- return nil, err
- }
- }
- }
- g.AddMount(overlayMount)
- } else {
- volMount := spec.Mount{
- Type: "bind",
- Source: mountPoint,
- Destination: namedVol.Dest,
- Options: namedVol.Options,
- }
- g.AddMount(volMount)
- }
- }
-
- // Check if the spec file mounts contain the options z, Z or U.
- // If they have z or Z, relabel the source directory and then remove the option.
- // If they have U, chown the source directory and them remove the option.
- for i := range g.Config.Mounts {
- m := &g.Config.Mounts[i]
- var options []string
- for _, o := range m.Options {
- switch o {
- case "U":
- if m.Type == "tmpfs" {
- options = append(options, []string{fmt.Sprintf("uid=%d", execUser.Uid), fmt.Sprintf("gid=%d", execUser.Gid)}...)
- } else {
- // only chown on initial creation of container
- if err := c.ChangeHostPathOwnership(m.Source, true, int(hostUID), int(hostGID)); err != nil {
- return nil, err
- }
- }
- case "z":
- fallthrough
- case "Z":
- if err := c.relabel(m.Source, c.MountLabel(), label.IsShared(o)); err != nil {
- return nil, err
- }
-
- default:
- options = append(options, o)
- }
- }
- m.Options = options
- }
-
- g.SetProcessSelinuxLabel(c.ProcessLabel())
- g.SetLinuxMountLabel(c.MountLabel())
-
- // Add bind mounts to container
- for dstPath, srcPath := range c.state.BindMounts {
- newMount := spec.Mount{
- Type: "bind",
- Source: srcPath,
- Destination: dstPath,
- Options: []string{"bind", "rprivate"},
- }
- if c.IsReadOnly() && dstPath != "/dev/shm" {
- newMount.Options = append(newMount.Options, "ro", "nosuid", "noexec", "nodev")
- }
- if dstPath == "/dev/shm" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir {
- newMount.Options = append(newMount.Options, "nosuid", "noexec", "nodev")
- }
- if !MountExists(g.Mounts(), dstPath) {
- g.AddMount(newMount)
- } else {
- logrus.Infof("User mount overriding libpod mount at %q", dstPath)
- }
- }
-
- // Add overlay volumes
- for _, overlayVol := range c.config.OverlayVolumes {
- upperDir, workDir, err := getOverlayUpperAndWorkDir(overlayVol.Options)
- if err != nil {
- return nil, err
- }
- contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID())
- if err != nil {
- return nil, err
- }
- overlayOpts := &overlay.Options{RootUID: c.RootUID(),
- RootGID: c.RootGID(),
- UpperDirOptionFragment: upperDir,
- WorkDirOptionFragment: workDir,
- GraphOpts: c.runtime.store.GraphOptions(),
- }
-
- overlayMount, err := overlay.MountWithOptions(contentDir, overlayVol.Source, overlayVol.Dest, overlayOpts)
- if err != nil {
- return nil, fmt.Errorf("mounting overlay failed %q: %w", overlayVol.Source, err)
- }
-
- // Check overlay volume options
- for _, o := range overlayVol.Options {
- if o == "U" {
- if err := c.ChangeHostPathOwnership(overlayVol.Source, true, int(hostUID), int(hostGID)); err != nil {
- return nil, err
- }
-
- if err := c.ChangeHostPathOwnership(contentDir, true, int(hostUID), int(hostGID)); err != nil {
- return nil, err
- }
- }
- }
-
- g.AddMount(overlayMount)
- }
-
- // Add image volumes as overlay mounts
- for _, volume := range c.config.ImageVolumes {
- // Mount the specified image.
- img, _, err := c.runtime.LibimageRuntime().LookupImage(volume.Source, nil)
- if err != nil {
- return nil, fmt.Errorf("error creating image volume %q:%q: %w", volume.Source, volume.Dest, err)
- }
- mountPoint, err := img.Mount(ctx, nil, "")
- if err != nil {
- return nil, fmt.Errorf("error mounting image volume %q:%q: %w", volume.Source, volume.Dest, err)
- }
-
- contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID())
- if err != nil {
- return nil, fmt.Errorf("failed to create TempDir in the %s directory: %w", c.config.StaticDir, err)
- }
-
- var overlayMount spec.Mount
- if volume.ReadWrite {
- overlayMount, err = overlay.Mount(contentDir, mountPoint, volume.Dest, c.RootUID(), c.RootGID(), c.runtime.store.GraphOptions())
- } else {
- overlayMount, err = overlay.MountReadOnly(contentDir, mountPoint, volume.Dest, c.RootUID(), c.RootGID(), c.runtime.store.GraphOptions())
- }
- if err != nil {
- return nil, fmt.Errorf("creating overlay mount for image %q failed: %w", volume.Source, err)
- }
- g.AddMount(overlayMount)
- }
-
- hasHomeSet := false
- for _, s := range c.config.Spec.Process.Env {
- if strings.HasPrefix(s, "HOME=") {
- hasHomeSet = true
- break
- }
- }
- if !hasHomeSet && execUser.Home != "" {
- c.config.Spec.Process.Env = append(c.config.Spec.Process.Env, fmt.Sprintf("HOME=%s", execUser.Home))
- }
-
- if c.config.User != "" {
- // User and Group must go together
- g.SetProcessUID(uint32(execUser.Uid))
- g.SetProcessGID(uint32(execUser.Gid))
- }
-
- if c.config.Umask != "" {
- decVal, err := strconv.ParseUint(c.config.Umask, 8, 32)
- if err != nil {
- return nil, fmt.Errorf("invalid Umask Value: %w", err)
- }
- umask := uint32(decVal)
- g.Config.Process.User.Umask = &umask
- }
-
- // Add addition groups if c.config.GroupAdd is not empty
- if len(c.config.Groups) > 0 {
- gids, err := lookup.GetContainerGroups(c.config.Groups, c.state.Mountpoint, overrides)
- if err != nil {
- return nil, fmt.Errorf("error looking up supplemental groups for container %s: %w", c.ID(), err)
- }
- for _, gid := range gids {
- g.AddProcessAdditionalGid(gid)
- }
- }
-
- if c.Systemd() {
- if err := c.setupSystemd(g.Mounts(), g); err != nil {
- return nil, fmt.Errorf("error adding systemd-specific mounts: %w", err)
- }
- }
-
- // Look up and add groups the user belongs to, if a group wasn't directly specified
- if !strings.Contains(c.config.User, ":") {
- // the gidMappings that are present inside the container user namespace
- var gidMappings []idtools.IDMap
-
- switch {
- case len(c.config.IDMappings.GIDMap) > 0:
- gidMappings = c.config.IDMappings.GIDMap
- case rootless.IsRootless():
- // Check whether the current user namespace has enough gids available.
- availableGids, err := rootless.GetAvailableGids()
- if err != nil {
- return nil, fmt.Errorf("cannot read number of available GIDs: %w", err)
- }
- gidMappings = []idtools.IDMap{{
- ContainerID: 0,
- HostID: 0,
- Size: int(availableGids),
- }}
- default:
- gidMappings = []idtools.IDMap{{
- ContainerID: 0,
- HostID: 0,
- Size: math.MaxInt32,
- }}
- }
- for _, gid := range execUser.Sgids {
- isGIDAvailable := false
- for _, m := range gidMappings {
- if gid >= m.ContainerID && gid < m.ContainerID+m.Size {
- isGIDAvailable = true
- break
- }
- }
- if isGIDAvailable {
- g.AddProcessAdditionalGid(uint32(gid))
- } else {
- logrus.Warnf("Additional gid=%d is not present in the user namespace, skip setting it", gid)
- }
- }
- }
-
- // Add shared namespaces from other containers
- if c.config.IPCNsCtr != "" {
- if err := c.addNamespaceContainer(&g, IPCNS, c.config.IPCNsCtr, spec.IPCNamespace); err != nil {
- return nil, err
- }
- }
- if c.config.MountNsCtr != "" {
- if err := c.addNamespaceContainer(&g, MountNS, c.config.MountNsCtr, spec.MountNamespace); err != nil {
- return nil, err
- }
- }
- if c.config.NetNsCtr != "" {
- if err := c.addNamespaceContainer(&g, NetNS, c.config.NetNsCtr, spec.NetworkNamespace); err != nil {
- return nil, err
- }
- }
- if c.config.PIDNsCtr != "" {
- if err := c.addNamespaceContainer(&g, PIDNS, c.config.PIDNsCtr, spec.PIDNamespace); err != nil {
- return nil, err
- }
- }
- if c.config.UserNsCtr != "" {
- if err := c.addNamespaceContainer(&g, UserNS, c.config.UserNsCtr, spec.UserNamespace); err != nil {
- return nil, err
- }
- if len(g.Config.Linux.UIDMappings) == 0 {
- // runc complains if no mapping is specified, even if we join another ns. So provide a dummy mapping
- g.AddLinuxUIDMapping(uint32(0), uint32(0), uint32(1))
- g.AddLinuxGIDMapping(uint32(0), uint32(0), uint32(1))
- }
- }
-
- availableUIDs, availableGIDs, err := rootless.GetAvailableIDMaps()
- if err != nil {
- if os.IsNotExist(err) {
- // The kernel-provided files only exist if user namespaces are supported
- logrus.Debugf("User or group ID mappings not available: %s", err)
- } else {
- return nil, err
- }
- } else {
- g.Config.Linux.UIDMappings = rootless.MaybeSplitMappings(g.Config.Linux.UIDMappings, availableUIDs)
- g.Config.Linux.GIDMappings = rootless.MaybeSplitMappings(g.Config.Linux.GIDMappings, availableGIDs)
- }
-
- // Hostname handling:
- // If we have a UTS namespace, set Hostname in the OCI spec.
- // Set the HOSTNAME environment variable unless explicitly overridden by
- // the user (already present in OCI spec). If we don't have a UTS ns,
- // set it to the host's hostname instead.
- hostname := c.Hostname()
- foundUTS := false
-
- for _, i := range c.config.Spec.Linux.Namespaces {
- if i.Type == spec.UTSNamespace && i.Path == "" {
- foundUTS = true
- g.SetHostname(hostname)
- break
- }
- }
- if !foundUTS {
- tmpHostname, err := os.Hostname()
- if err != nil {
- return nil, err
- }
- hostname = tmpHostname
- }
- needEnv := true
- for _, checkEnv := range g.Config.Process.Env {
- if strings.SplitN(checkEnv, "=", 2)[0] == "HOSTNAME" {
- needEnv = false
- break
- }
- }
- if needEnv {
- g.AddProcessEnv("HOSTNAME", hostname)
- }
-
- if c.config.UTSNsCtr != "" {
- if err := c.addNamespaceContainer(&g, UTSNS, c.config.UTSNsCtr, spec.UTSNamespace); err != nil {
- return nil, err
- }
- }
- if c.config.CgroupNsCtr != "" {
- if err := c.addNamespaceContainer(&g, CgroupNS, c.config.CgroupNsCtr, spec.CgroupNamespace); err != nil {
- return nil, err
- }
- }
-
- if c.config.UserNsCtr == "" && c.config.IDMappings.AutoUserNs {
- if err := g.AddOrReplaceLinuxNamespace(string(spec.UserNamespace), ""); err != nil {
- return nil, err
- }
- g.ClearLinuxUIDMappings()
- for _, uidmap := range c.config.IDMappings.UIDMap {
- g.AddLinuxUIDMapping(uint32(uidmap.HostID), uint32(uidmap.ContainerID), uint32(uidmap.Size))
- }
- g.ClearLinuxGIDMappings()
- for _, gidmap := range c.config.IDMappings.GIDMap {
- g.AddLinuxGIDMapping(uint32(gidmap.HostID), uint32(gidmap.ContainerID), uint32(gidmap.Size))
- }
- }
-
- g.SetRootPath(c.state.Mountpoint)
- g.AddAnnotation(annotations.Created, c.config.CreatedTime.Format(time.RFC3339Nano))
- g.AddAnnotation("org.opencontainers.image.stopSignal", fmt.Sprintf("%d", c.config.StopSignal))
-
- if _, exists := g.Config.Annotations[annotations.ContainerManager]; !exists {
- g.AddAnnotation(annotations.ContainerManager, annotations.ContainerManagerLibpod)
- }
-
- cgroupPath, err := c.getOCICgroupPath()
- if err != nil {
- return nil, err
- }
-
- g.SetLinuxCgroupsPath(cgroupPath)
-
- // Warning: CDI may alter g.Config in place.
- if len(c.config.CDIDevices) > 0 {
- registry := cdi.GetRegistry()
- if errs := registry.GetErrors(); len(errs) > 0 {
- logrus.Debugf("The following errors were triggered when creating the CDI registry: %v", errs)
- }
- _, err := registry.InjectDevices(g.Config, c.config.CDIDevices...)
- if err != nil {
- return nil, fmt.Errorf("error setting up CDI devices: %w", err)
- }
- }
-
- // Mounts need to be sorted so paths will not cover other paths
- mounts := sortMounts(g.Mounts())
- g.ClearMounts()
-
- // Determine property of RootPropagation based on volume properties. If
- // a volume is shared, then keep root propagation shared. This should
- // work for slave and private volumes too.
- //
- // For slave volumes, it can be either [r]shared/[r]slave.
- //
- // For private volumes any root propagation value should work.
- rootPropagation := ""
- for _, m := range mounts {
- // We need to remove all symlinks from tmpfs mounts.
- // Runc and other runtimes may choke on them.
- // Easy solution: use securejoin to do a scoped evaluation of
- // the links, then trim off the mount prefix.
- if m.Type == "tmpfs" {
- finalPath, err := securejoin.SecureJoin(c.state.Mountpoint, m.Destination)
- if err != nil {
- return nil, fmt.Errorf("error resolving symlinks for mount destination %s: %w", m.Destination, err)
- }
- trimmedPath := strings.TrimPrefix(finalPath, strings.TrimSuffix(c.state.Mountpoint, "/"))
- m.Destination = trimmedPath
- }
- g.AddMount(m)
- for _, opt := range m.Options {
- switch opt {
- case MountShared, MountRShared:
- if rootPropagation != MountShared && rootPropagation != MountRShared {
- rootPropagation = MountShared
- }
- case MountSlave, MountRSlave:
- if rootPropagation != MountShared && rootPropagation != MountRShared && rootPropagation != MountSlave && rootPropagation != MountRSlave {
- rootPropagation = MountRSlave
- }
- }
- }
- }
-
- if rootPropagation != "" {
- logrus.Debugf("Set root propagation to %q", rootPropagation)
- if err := g.SetLinuxRootPropagation(rootPropagation); err != nil {
- return nil, err
- }
- }
-
- // Warning: precreate hooks may alter g.Config in place.
- if c.state.ExtensionStageHooks, err = c.setupOCIHooks(ctx, g.Config); err != nil {
- return nil, fmt.Errorf("error setting up OCI Hooks: %w", err)
- }
- if len(c.config.EnvSecrets) > 0 {
- manager, err := c.runtime.SecretsManager()
- if err != nil {
- return nil, err
- }
- if err != nil {
- return nil, err
- }
- for name, secr := range c.config.EnvSecrets {
- _, data, err := manager.LookupSecretData(secr.Name)
- if err != nil {
- return nil, err
- }
- g.AddProcessEnv(name, string(data))
- }
- }
-
- // Pass down the LISTEN_* environment (see #10443).
- for _, key := range []string{"LISTEN_PID", "LISTEN_FDS", "LISTEN_FDNAMES"} {
- if val, ok := os.LookupEnv(key); ok {
- // Force the PID to `1` since we cannot rely on (all
- // versions of) all runtimes to do it for us.
- if key == "LISTEN_PID" {
- val = "1"
- }
- g.AddProcessEnv(key, val)
- }
- }
-
- return g.Config, nil
-}
-
-// mountNotifySocket mounts the NOTIFY_SOCKET into the container if it's set
-// and if the sdnotify mode is set to container. It also sets c.notifySocket
-// to avoid redundantly looking up the env variable.
-func (c *Container) mountNotifySocket(g generate.Generator) error {
- notify, ok := os.LookupEnv("NOTIFY_SOCKET")
- if !ok {
- return nil
- }
- c.notifySocket = notify
-
- if c.config.SdNotifyMode != define.SdNotifyModeContainer {
- return nil
- }
-
- notifyDir := filepath.Join(c.bundlePath(), "notify")
- logrus.Debugf("Checking notify %q dir", notifyDir)
- if err := os.MkdirAll(notifyDir, 0755); err != nil {
- if !os.IsExist(err) {
- return fmt.Errorf("unable to create notify %q dir: %w", notifyDir, err)
- }
- }
- if err := label.Relabel(notifyDir, c.MountLabel(), true); err != nil {
- return fmt.Errorf("relabel failed %q: %w", notifyDir, err)
- }
- logrus.Debugf("Add bindmount notify %q dir", notifyDir)
- if _, ok := c.state.BindMounts["/run/notify"]; !ok {
- c.state.BindMounts["/run/notify"] = notifyDir
- }
-
- // Set the container's notify socket to the proxy socket created by conmon
- g.AddProcessEnv("NOTIFY_SOCKET", "/run/notify/notify.sock")
-
- return nil
-}
-
// systemd expects to have /run, /run/lock and /tmp on tmpfs
// It also expects to be able to write to /sys/fs/cgroup/systemd and /var/log/journal
func (c *Container) setupSystemd(mounts []spec.Mount, g generate.Generator) error {
@@ -1074,9 +267,15 @@ func (c *Container) setupSystemd(mounts []spec.Mount, g generate.Generator) erro
g.AddMount(systemdMnt)
} else {
mountOptions := []string{"bind", "rprivate"}
+ skipMount := false
var statfs unix.Statfs_t
if err := unix.Statfs("/sys/fs/cgroup/systemd", &statfs); err != nil {
+ if errors.Is(err, os.ErrNotExist) {
+ // If the mount is missing on the host, we cannot bind mount it so
+ // just skip it.
+ skipMount = true
+ }
mountOptions = append(mountOptions, "nodev", "noexec", "nosuid")
} else {
if statfs.Flags&unix.MS_NODEV == unix.MS_NODEV {
@@ -1092,15 +291,16 @@ func (c *Container) setupSystemd(mounts []spec.Mount, g generate.Generator) erro
mountOptions = append(mountOptions, "ro")
}
}
-
- systemdMnt := spec.Mount{
- Destination: "/sys/fs/cgroup/systemd",
- Type: "bind",
- Source: "/sys/fs/cgroup/systemd",
- Options: mountOptions,
+ if !skipMount {
+ systemdMnt := spec.Mount{
+ Destination: "/sys/fs/cgroup/systemd",
+ Type: "bind",
+ Source: "/sys/fs/cgroup/systemd",
+ Options: mountOptions,
+ }
+ g.AddMount(systemdMnt)
+ g.AddLinuxMaskedPaths("/sys/fs/cgroup/systemd/release_agent")
}
- g.AddMount(systemdMnt)
- g.AddLinuxMaskedPaths("/sys/fs/cgroup/systemd/release_agent")
}
return nil
@@ -1110,7 +310,7 @@ func (c *Container) setupSystemd(mounts []spec.Mount, g generate.Generator) erro
func (c *Container) addNamespaceContainer(g *generate.Generator, ns LinuxNS, ctr string, specNS spec.LinuxNamespaceType) error {
nsCtr, err := c.runtime.state.Container(ctr)
if err != nil {
- return fmt.Errorf("error retrieving dependency %s of container %s from state: %w", ctr, c.ID(), err)
+ return fmt.Errorf("retrieving dependency %s of container %s from state: %w", ctr, c.ID(), err)
}
if specNS == spec.UTSNamespace {
@@ -1132,1287 +332,289 @@ func (c *Container) addNamespaceContainer(g *generate.Generator, ns LinuxNS, ctr
return nil
}
-func (c *Container) addCheckpointImageMetadata(importBuilder *buildah.Builder) error {
- // Get information about host environment
- hostInfo, err := c.Runtime().hostInfo()
- if err != nil {
- return fmt.Errorf("getting host info: %v", err)
- }
-
- criuVersion, err := criu.GetCriuVersion()
- if err != nil {
- return fmt.Errorf("getting criu version: %v", err)
- }
-
- rootfsImageID, rootfsImageName := c.Image()
-
- // Add image annotations with information about the container and the host.
- // This information is useful to check compatibility before restoring the checkpoint
-
- checkpointImageAnnotations := map[string]string{
- define.CheckpointAnnotationName: c.config.Name,
- define.CheckpointAnnotationRawImageName: c.config.RawImageName,
- define.CheckpointAnnotationRootfsImageID: rootfsImageID,
- define.CheckpointAnnotationRootfsImageName: rootfsImageName,
- define.CheckpointAnnotationPodmanVersion: version.Version.String(),
- define.CheckpointAnnotationCriuVersion: strconv.Itoa(criuVersion),
- define.CheckpointAnnotationRuntimeName: hostInfo.OCIRuntime.Name,
- define.CheckpointAnnotationRuntimeVersion: hostInfo.OCIRuntime.Version,
- define.CheckpointAnnotationConmonVersion: hostInfo.Conmon.Version,
- define.CheckpointAnnotationHostArch: hostInfo.Arch,
- define.CheckpointAnnotationHostKernel: hostInfo.Kernel,
- define.CheckpointAnnotationCgroupVersion: hostInfo.CgroupsVersion,
- define.CheckpointAnnotationDistributionVersion: hostInfo.Distribution.Version,
- define.CheckpointAnnotationDistributionName: hostInfo.Distribution.Distribution,
- }
-
- for key, value := range checkpointImageAnnotations {
- importBuilder.SetAnnotation(key, value)
- }
-
- return nil
-}
-
-func (c *Container) resolveCheckpointImageName(options *ContainerCheckpointOptions) error {
- if options.CreateImage == "" {
- return nil
- }
-
- // Resolve image name
- resolvedImageName, err := c.runtime.LibimageRuntime().ResolveName(options.CreateImage)
- if err != nil {
- return err
- }
-
- options.CreateImage = resolvedImageName
- return nil
+func isRootlessCgroupSet(cgroup string) bool {
+ // old versions of podman were setting the CgroupParent to CgroupfsDefaultCgroupParent
+ // by default. Avoid breaking these versions and check whether the cgroup parent is
+ // set to the default and in this case enable the old behavior. It should not be a real
+ // problem because the default CgroupParent is usually owned by root so rootless users
+ // cannot access it.
+ // This check might be lifted in a future version of Podman.
+ // Check both that the cgroup or its parent is set to the default value (used by pods).
+ return cgroup != CgroupfsDefaultCgroupParent && filepath.Dir(cgroup) != CgroupfsDefaultCgroupParent
}
-func (c *Container) createCheckpointImage(ctx context.Context, options ContainerCheckpointOptions) error {
- if options.CreateImage == "" {
- return nil
- }
- logrus.Debugf("Create checkpoint image %s", options.CreateImage)
-
- // Create storage reference
- imageRef, err := is.Transport.ParseStoreReference(c.runtime.store, options.CreateImage)
- if err != nil {
- return errors.New("failed to parse image name")
- }
-
- // Build an image scratch
- builderOptions := buildah.BuilderOptions{
- FromImage: "scratch",
- }
- importBuilder, err := buildah.NewBuilder(ctx, c.runtime.store, builderOptions)
- if err != nil {
- return err
- }
- // Clean up buildah working container
- defer func() {
- if err := importBuilder.Delete(); err != nil {
- logrus.Errorf("Image builder delete failed: %v", err)
- }
- }()
-
- if err := c.prepareCheckpointExport(); err != nil {
- return err
- }
-
- // Export checkpoint into temporary tar file
- tmpDir, err := ioutil.TempDir("", "checkpoint_image_")
+func (c *Container) expectPodCgroup() (bool, error) {
+ unified, err := cgroups.IsCgroup2UnifiedMode()
if err != nil {
- return err
- }
- defer os.RemoveAll(tmpDir)
-
- options.TargetFile = path.Join(tmpDir, "checkpoint.tar")
-
- if err := c.exportCheckpoint(options); err != nil {
- return err
- }
-
- // Copy checkpoint from temporary tar file in the image
- addAndCopyOptions := buildah.AddAndCopyOptions{}
- if err := importBuilder.Add("", true, addAndCopyOptions, options.TargetFile); err != nil {
- return err
- }
-
- if err := c.addCheckpointImageMetadata(importBuilder); err != nil {
- return err
+ return false, err
}
-
- commitOptions := buildah.CommitOptions{
- Squash: true,
- SystemContext: c.runtime.imageContext,
+ cgroupManager := c.CgroupManager()
+ switch {
+ case c.config.NoCgroups:
+ return false, nil
+ case cgroupManager == config.SystemdCgroupsManager:
+ return !rootless.IsRootless() || unified, nil
+ case cgroupManager == config.CgroupfsCgroupsManager:
+ return !rootless.IsRootless(), nil
+ default:
+ return false, fmt.Errorf("invalid cgroup mode %s requested for pods: %w", cgroupManager, define.ErrInvalidArg)
}
+}
- // Create checkpoint image
- id, _, _, err := importBuilder.Commit(ctx, imageRef, commitOptions)
+// Get cgroup path in a format suitable for the OCI spec
+func (c *Container) getOCICgroupPath() (string, error) {
+ unified, err := cgroups.IsCgroup2UnifiedMode()
if err != nil {
- return err
+ return "", err
}
- logrus.Debugf("Created checkpoint image: %s", id)
- return nil
-}
-
-func (c *Container) exportCheckpoint(options ContainerCheckpointOptions) error {
- if len(c.Dependencies()) == 1 {
- // Check if the dependency is an infra container. If it is we can checkpoint
- // the container out of the Pod.
- if c.config.Pod == "" {
- return errors.New("cannot export checkpoints of containers with dependencies")
- }
-
- pod, err := c.runtime.state.Pod(c.config.Pod)
- if err != nil {
- return fmt.Errorf("container %s is in pod %s, but pod cannot be retrieved: %w", c.ID(), c.config.Pod, err)
- }
- infraID, err := pod.InfraContainerID()
+ cgroupManager := c.CgroupManager()
+ switch {
+ case c.config.NoCgroups:
+ return "", nil
+ case c.config.CgroupsMode == cgroupSplit:
+ selfCgroup, err := utils.GetOwnCgroupDisallowRoot()
if err != nil {
- return fmt.Errorf("cannot retrieve infra container ID for pod %s: %w", c.config.Pod, err)
+ return "", err
}
- if c.Dependencies()[0] != infraID {
- return errors.New("cannot export checkpoints of containers with dependencies")
+ return filepath.Join(selfCgroup, fmt.Sprintf("libpod-payload-%s", c.ID())), nil
+ case cgroupManager == config.SystemdCgroupsManager:
+ // When the OCI runtime is set to use Systemd as a cgroup manager, it
+ // expects cgroups to be passed as follows:
+ // slice:prefix:name
+ systemdCgroups := fmt.Sprintf("%s:libpod:%s", path.Base(c.config.CgroupParent), c.ID())
+ logrus.Debugf("Setting Cgroups for container %s to %s", c.ID(), systemdCgroups)
+ return systemdCgroups, nil
+ case (rootless.IsRootless() && (cgroupManager == config.CgroupfsCgroupsManager || !unified)):
+ if c.config.CgroupParent == "" || !isRootlessCgroupSet(c.config.CgroupParent) {
+ return "", nil
}
+ fallthrough
+ case cgroupManager == config.CgroupfsCgroupsManager:
+ cgroupPath := filepath.Join(c.config.CgroupParent, fmt.Sprintf("libpod-%s", c.ID()))
+ logrus.Debugf("Setting Cgroup path for container %s to %s", c.ID(), cgroupPath)
+ return cgroupPath, nil
+ default:
+ return "", fmt.Errorf("invalid cgroup manager %s requested: %w", cgroupManager, define.ErrInvalidArg)
}
- if len(c.Dependencies()) > 1 {
- return errors.New("cannot export checkpoints of containers with dependencies")
- }
- logrus.Debugf("Exporting checkpoint image of container %q to %q", c.ID(), options.TargetFile)
-
- includeFiles := []string{
- "artifacts",
- metadata.DevShmCheckpointTar,
- metadata.ConfigDumpFile,
- metadata.SpecDumpFile,
- metadata.NetworkStatusFile,
- stats.StatsDump,
- }
+}
- if c.LogDriver() == define.KubernetesLogging ||
- c.LogDriver() == define.JSONLogging {
- includeFiles = append(includeFiles, "ctr.log")
- }
- if options.PreCheckPoint {
- includeFiles = append(includeFiles, preCheckpointDir)
- } else {
- includeFiles = append(includeFiles, metadata.CheckpointDirectory)
- }
- // Get root file-system changes included in the checkpoint archive
- var addToTarFiles []string
- if !options.IgnoreRootfs {
- // To correctly track deleted files, let's go through the output of 'podman diff'
- rootFsChanges, err := c.runtime.GetDiff("", c.ID(), define.DiffContainer)
+// If the container is rootless, set up the slirp4netns network
+func (c *Container) setupRootlessNetwork() error {
+ // set up slirp4netns again because slirp4netns will die when conmon exits
+ if c.config.NetMode.IsSlirp4netns() {
+ err := c.runtime.setupSlirp4netns(c, c.state.NetNS)
if err != nil {
- return fmt.Errorf("error exporting root file-system diff for %q: %w", c.ID(), err)
+ return err
}
+ }
- addToTarFiles, err := crutils.CRCreateRootFsDiffTar(&rootFsChanges, c.state.Mountpoint, c.bundlePath())
+ // set up rootlesskit port forwarder again since it dies when conmon exits
+ // we use rootlesskit port forwarder only as rootless and when bridge network is used
+ if rootless.IsRootless() && c.config.NetMode.IsBridge() && len(c.config.PortMappings) > 0 {
+ err := c.runtime.setupRootlessPortMappingViaRLK(c, c.state.NetNS.Path(), c.state.NetworkStatus)
if err != nil {
return err
}
-
- includeFiles = append(includeFiles, addToTarFiles...)
}
+ return nil
+}
- // Folder containing archived volumes that will be included in the export
- expVolDir := filepath.Join(c.bundlePath(), metadata.CheckpointVolumesDirectory)
-
- // Create an archive for each volume associated with the container
- if !options.IgnoreVolumes {
- if err := os.MkdirAll(expVolDir, 0700); err != nil {
- return fmt.Errorf("error creating volumes export directory %q: %w", expVolDir, err)
- }
-
- for _, v := range c.config.NamedVolumes {
- volumeTarFilePath := filepath.Join(metadata.CheckpointVolumesDirectory, v.Name+".tar")
- volumeTarFileFullPath := filepath.Join(c.bundlePath(), volumeTarFilePath)
-
- volumeTarFile, err := os.Create(volumeTarFileFullPath)
- if err != nil {
- return fmt.Errorf("error creating %q: %w", volumeTarFileFullPath, err)
- }
-
- volume, err := c.runtime.GetVolume(v.Name)
- if err != nil {
- return err
- }
+func openDirectory(path string) (fd int, err error) {
+ return unix.Open(path, unix.O_RDONLY|unix.O_PATH, 0)
+}
- mp, err := volume.MountPoint()
- if err != nil {
+func (c *Container) addNetworkNamespace(g *generate.Generator) error {
+ if c.config.CreateNetNS {
+ if c.config.PostConfigureNetNS {
+ if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), ""); err != nil {
return err
}
- if mp == "" {
- return fmt.Errorf("volume %s is not mounted, cannot export: %w", volume.Name(), define.ErrInternal)
- }
-
- input, err := archive.TarWithOptions(mp, &archive.TarOptions{
- Compression: archive.Uncompressed,
- IncludeSourceDir: true,
- })
- if err != nil {
- return fmt.Errorf("error reading volume directory %q: %w", v.Dest, err)
- }
-
- _, err = io.Copy(volumeTarFile, input)
- if err != nil {
+ } else {
+ if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), c.state.NetNS.Path()); err != nil {
return err
}
- volumeTarFile.Close()
-
- includeFiles = append(includeFiles, volumeTarFilePath)
}
}
-
- input, err := archive.TarWithOptions(c.bundlePath(), &archive.TarOptions{
- Compression: options.Compression,
- IncludeSourceDir: true,
- IncludeFiles: includeFiles,
- })
-
- if err != nil {
- return fmt.Errorf("error reading checkpoint directory %q: %w", c.ID(), err)
- }
-
- outFile, err := os.Create(options.TargetFile)
- if err != nil {
- return fmt.Errorf("error creating checkpoint export file %q: %w", options.TargetFile, err)
- }
- defer outFile.Close()
-
- if err := os.Chmod(options.TargetFile, 0600); err != nil {
- return err
- }
-
- _, err = io.Copy(outFile, input)
- if err != nil {
- return err
- }
-
- for _, file := range addToTarFiles {
- os.Remove(filepath.Join(c.bundlePath(), file))
- }
-
- if !options.IgnoreVolumes {
- os.RemoveAll(expVolDir)
- }
-
- return nil
-}
-
-func (c *Container) checkpointRestoreSupported(version int) error {
- if !criu.CheckForCriu(version) {
- return fmt.Errorf("checkpoint/restore requires at least CRIU %d", version)
- }
- if !c.ociRuntime.SupportsCheckpoint() {
- return errors.New("configured runtime does not support checkpoint/restore")
- }
return nil
}
-func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointOptions) (*define.CRIUCheckpointRestoreStatistics, int64, error) {
- if err := c.checkpointRestoreSupported(criu.MinCriuVersion); err != nil {
- return nil, 0, err
- }
-
- if c.state.State != define.ContainerStateRunning {
- return nil, 0, fmt.Errorf("%q is not running, cannot checkpoint: %w", c.state.State, define.ErrCtrStateInvalid)
- }
-
- if c.AutoRemove() && options.TargetFile == "" {
- return nil, 0, errors.New("cannot checkpoint containers that have been started with '--rm' unless '--export' is used")
- }
-
- if err := c.resolveCheckpointImageName(&options); err != nil {
- return nil, 0, err
- }
-
- if err := crutils.CRCreateFileWithLabel(c.bundlePath(), "dump.log", c.MountLabel()); err != nil {
- return nil, 0, err
- }
-
- // Setting CheckpointLog early in case there is a failure.
- c.state.CheckpointLog = path.Join(c.bundlePath(), "dump.log")
- c.state.CheckpointPath = c.CheckpointPath()
-
- runtimeCheckpointDuration, err := c.ociRuntime.CheckpointContainer(c, options)
- if err != nil {
- return nil, 0, err
- }
-
- // Keep the content of /dev/shm directory
- if c.config.ShmDir != "" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir {
- shmDirTarFileFullPath := filepath.Join(c.bundlePath(), metadata.DevShmCheckpointTar)
-
- shmDirTarFile, err := os.Create(shmDirTarFileFullPath)
- if err != nil {
- return nil, 0, err
- }
- defer shmDirTarFile.Close()
-
- input, err := archive.TarWithOptions(c.config.ShmDir, &archive.TarOptions{
- Compression: archive.Uncompressed,
- IncludeSourceDir: true,
- })
- if err != nil {
- return nil, 0, err
- }
-
- if _, err = io.Copy(shmDirTarFile, input); err != nil {
- return nil, 0, err
- }
- }
-
- // Save network.status. This is needed to restore the container with
- // the same IP. Currently limited to one IP address in a container
- // with one interface.
- // FIXME: will this break something?
- if _, err := metadata.WriteJSONFile(c.getNetworkStatus(), c.bundlePath(), metadata.NetworkStatusFile); err != nil {
- return nil, 0, err
- }
-
- defer c.newContainerEvent(events.Checkpoint)
-
- // There is a bug from criu: https://github.com/checkpoint-restore/criu/issues/116
- // We have to change the symbolic link from absolute path to relative path
- if options.WithPrevious {
- os.Remove(path.Join(c.CheckpointPath(), "parent"))
- if err := os.Symlink("../pre-checkpoint", path.Join(c.CheckpointPath(), "parent")); err != nil {
- return nil, 0, err
- }
- }
-
- if options.TargetFile != "" {
- if err := c.exportCheckpoint(options); err != nil {
- return nil, 0, err
- }
- } else {
- if err := c.createCheckpointImage(ctx, options); err != nil {
- return nil, 0, err
- }
- }
-
- logrus.Debugf("Checkpointed container %s", c.ID())
-
- if !options.KeepRunning && !options.PreCheckPoint {
- c.state.State = define.ContainerStateStopped
- c.state.Checkpointed = true
- c.state.CheckpointedTime = time.Now()
- c.state.Restored = false
- c.state.RestoredTime = time.Time{}
-
- // Clean up Storage and Network
- if err := c.cleanup(ctx); err != nil {
- return nil, 0, err
- }
- }
-
- criuStatistics, err := func() (*define.CRIUCheckpointRestoreStatistics, error) {
- if !options.PrintStats {
- return nil, nil
- }
- statsDirectory, err := os.Open(c.bundlePath())
- if err != nil {
- return nil, fmt.Errorf("not able to open %q: %w", c.bundlePath(), err)
- }
-
- dumpStatistics, err := stats.CriuGetDumpStats(statsDirectory)
- if err != nil {
- return nil, fmt.Errorf("displaying checkpointing statistics not possible: %w", err)
- }
-
- return &define.CRIUCheckpointRestoreStatistics{
- FreezingTime: dumpStatistics.GetFreezingTime(),
- FrozenTime: dumpStatistics.GetFrozenTime(),
- MemdumpTime: dumpStatistics.GetMemdumpTime(),
- MemwriteTime: dumpStatistics.GetMemwriteTime(),
- PagesScanned: dumpStatistics.GetPagesScanned(),
- PagesWritten: dumpStatistics.GetPagesWritten(),
- }, nil
- }()
- if err != nil {
- return nil, 0, err
- }
-
- if !options.Keep && !options.PreCheckPoint {
- cleanup := []string{
- "dump.log",
- stats.StatsDump,
- metadata.ConfigDumpFile,
- metadata.SpecDumpFile,
- }
- for _, del := range cleanup {
- file := filepath.Join(c.bundlePath(), del)
- if err := os.Remove(file); err != nil {
- logrus.Debugf("Unable to remove file %s", file)
- }
+func (c *Container) addSystemdMounts(g *generate.Generator) error {
+ if c.Systemd() {
+ if err := c.setupSystemd(g.Mounts(), *g); err != nil {
+ return fmt.Errorf("adding systemd-specific mounts: %w", err)
}
- // The file has been deleted. Do not mention it.
- c.state.CheckpointLog = ""
- }
-
- c.state.FinishedTime = time.Now()
- return criuStatistics, runtimeCheckpointDuration, c.save()
-}
-
-func (c *Container) generateContainerSpec() error {
- // Make sure the newly created config.json exists on disk
-
- // NewFromSpec() is deprecated according to its comment
- // however the recommended replace just causes a nil map panic
- //nolint:staticcheck
- g := generate.NewFromSpec(c.config.Spec)
-
- if err := c.saveSpec(g.Config); err != nil {
- return fmt.Errorf("saving imported container specification for restore failed: %w", err)
}
-
return nil
}
-func (c *Container) importCheckpointImage(ctx context.Context, imageID string) error {
- img, _, err := c.Runtime().LibimageRuntime().LookupImage(imageID, nil)
- if err != nil {
- return err
- }
-
- mountPoint, err := img.Mount(ctx, nil, "")
- defer func() {
- if err := c.unmount(true); err != nil {
- logrus.Errorf("Failed to unmount container: %v", err)
+func (c *Container) addSharedNamespaces(g *generate.Generator) error {
+ if c.config.IPCNsCtr != "" {
+ if err := c.addNamespaceContainer(g, IPCNS, c.config.IPCNsCtr, spec.IPCNamespace); err != nil {
+ return err
}
- }()
- if err != nil {
- return err
- }
-
- // Import all checkpoint files except ConfigDumpFile and SpecDumpFile. We
- // generate new container config files to enable to specifying a new
- // container name.
- checkpoint := []string{
- "artifacts",
- metadata.CheckpointDirectory,
- metadata.CheckpointVolumesDirectory,
- metadata.DevShmCheckpointTar,
- metadata.RootFsDiffTar,
- metadata.DeletedFilesFile,
- metadata.PodOptionsFile,
- metadata.PodDumpFile,
}
-
- for _, name := range checkpoint {
- src := filepath.Join(mountPoint, name)
- dst := filepath.Join(c.bundlePath(), name)
- if err := archive.NewDefaultArchiver().CopyWithTar(src, dst); err != nil {
- logrus.Debugf("Can't import '%s' from checkpoint image", name)
+ if c.config.MountNsCtr != "" {
+ if err := c.addNamespaceContainer(g, MountNS, c.config.MountNsCtr, spec.MountNamespace); err != nil {
+ return err
}
}
-
- return c.generateContainerSpec()
-}
-
-func (c *Container) importCheckpointTar(input string) error {
- if err := crutils.CRImportCheckpointWithoutConfig(c.bundlePath(), input); err != nil {
- return err
- }
-
- return c.generateContainerSpec()
-}
-
-func (c *Container) importPreCheckpoint(input string) error {
- archiveFile, err := os.Open(input)
- if err != nil {
- return fmt.Errorf("failed to open pre-checkpoint archive for import: %w", err)
- }
-
- defer archiveFile.Close()
-
- err = archive.Untar(archiveFile, c.bundlePath(), nil)
- if err != nil {
- return fmt.Errorf("unpacking of pre-checkpoint archive %s failed: %w", input, err)
- }
- return nil
-}
-
-func (c *Container) restore(ctx context.Context, options ContainerCheckpointOptions) (criuStatistics *define.CRIUCheckpointRestoreStatistics, runtimeRestoreDuration int64, retErr error) {
- minCriuVersion := func() int {
- if options.Pod == "" {
- return criu.MinCriuVersion
+ if c.config.NetNsCtr != "" {
+ if err := c.addNamespaceContainer(g, NetNS, c.config.NetNsCtr, spec.NetworkNamespace); err != nil {
+ return err
}
- return criu.PodCriuVersion
- }()
- if err := c.checkpointRestoreSupported(minCriuVersion); err != nil {
- return nil, 0, err
- }
-
- if options.Pod != "" && !crutils.CRRuntimeSupportsPodCheckpointRestore(c.ociRuntime.Path()) {
- return nil, 0, fmt.Errorf("runtime %s does not support pod restore", c.ociRuntime.Path())
- }
-
- if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateExited) {
- return nil, 0, fmt.Errorf("container %s is running or paused, cannot restore: %w", c.ID(), define.ErrCtrStateInvalid)
}
-
- if options.ImportPrevious != "" {
- if err := c.importPreCheckpoint(options.ImportPrevious); err != nil {
- return nil, 0, err
+ if c.config.PIDNsCtr != "" {
+ if err := c.addNamespaceContainer(g, PIDNS, c.config.PIDNsCtr, spec.PIDNamespace); err != nil {
+ return err
}
}
-
- if options.TargetFile != "" {
- if err := c.importCheckpointTar(options.TargetFile); err != nil {
- return nil, 0, err
+ if c.config.UserNsCtr != "" {
+ if err := c.addNamespaceContainer(g, UserNS, c.config.UserNsCtr, spec.UserNamespace); err != nil {
+ return err
}
- } else if options.CheckpointImageID != "" {
- if err := c.importCheckpointImage(ctx, options.CheckpointImageID); err != nil {
- return nil, 0, err
+ if len(g.Config.Linux.UIDMappings) == 0 {
+ // runc complains if no mapping is specified, even if we join another ns. So provide a dummy mapping
+ g.AddLinuxUIDMapping(uint32(0), uint32(0), uint32(1))
+ g.AddLinuxGIDMapping(uint32(0), uint32(0), uint32(1))
}
}
- // Let's try to stat() CRIU's inventory file. If it does not exist, it makes
- // no sense to try a restore. This is a minimal check if a checkpoint exist.
- if _, err := os.Stat(filepath.Join(c.CheckpointPath(), "inventory.img")); os.IsNotExist(err) {
- return nil, 0, fmt.Errorf("a complete checkpoint for this container cannot be found, cannot restore: %w", err)
- }
-
- if err := crutils.CRCreateFileWithLabel(c.bundlePath(), "restore.log", c.MountLabel()); err != nil {
- return nil, 0, err
- }
-
- // Setting RestoreLog early in case there is a failure.
- c.state.RestoreLog = path.Join(c.bundlePath(), "restore.log")
- c.state.CheckpointPath = c.CheckpointPath()
-
- // Read network configuration from checkpoint
- var netStatus map[string]types.StatusBlock
- _, err := metadata.ReadJSONFile(&netStatus, c.bundlePath(), metadata.NetworkStatusFile)
+ availableUIDs, availableGIDs, err := rootless.GetAvailableIDMaps()
if err != nil {
- logrus.Infof("Failed to unmarshal network status, cannot restore the same ip/mac: %v", err)
- }
- // If the restored container should get a new name, the IP address of
- // the container will not be restored. This assumes that if a new name is
- // specified, the container is restored multiple times.
- // TODO: This implicit restoring with or without IP depending on an
- // unrelated restore parameter (--name) does not seem like the
- // best solution.
- if err == nil && options.Name == "" && (!options.IgnoreStaticIP || !options.IgnoreStaticMAC) {
- // The file with the network.status does exist. Let's restore the
- // container with the same networks settings as during checkpointing.
- networkOpts, err := c.networks()
- if err != nil {
- return nil, 0, err
- }
-
- netOpts := make(map[string]types.PerNetworkOptions, len(netStatus))
- for network, perNetOpts := range networkOpts {
- // unset mac and ips before we start adding the ones from the status
- perNetOpts.StaticMAC = nil
- perNetOpts.StaticIPs = nil
- for name, netInt := range netStatus[network].Interfaces {
- perNetOpts.InterfaceName = name
- if !options.IgnoreStaticIP {
- perNetOpts.StaticMAC = netInt.MacAddress
- }
- if !options.IgnoreStaticIP {
- for _, netAddress := range netInt.Subnets {
- perNetOpts.StaticIPs = append(perNetOpts.StaticIPs, netAddress.IPNet.IP)
- }
- }
- // Normally interfaces have a length of 1, only for some special cni configs we could get more.
- // For now just use the first interface to get the ips this should be good enough for most cases.
- break
- }
- netOpts[network] = perNetOpts
- }
- c.perNetworkOpts = netOpts
- }
-
- defer func() {
- if retErr != nil {
- if err := c.cleanup(ctx); err != nil {
- logrus.Errorf("Cleaning up container %s: %v", c.ID(), err)
- }
+ if os.IsNotExist(err) {
+ // The kernel-provided files only exist if user namespaces are supported
+ logrus.Debugf("User or group ID mappings not available: %s", err)
+ } else {
+ return err
}
- }()
-
- if err := c.prepare(); err != nil {
- return nil, 0, err
- }
-
- // Read config
- jsonPath := filepath.Join(c.bundlePath(), "config.json")
- logrus.Debugf("generate.NewFromFile at %v", jsonPath)
- g, err := generate.NewFromFile(jsonPath)
- if err != nil {
- logrus.Debugf("generate.NewFromFile failed with %v", err)
- return nil, 0, err
- }
-
- // Restoring from an import means that we are doing migration
- if options.TargetFile != "" || options.CheckpointImageID != "" {
- g.SetRootPath(c.state.Mountpoint)
+ } else {
+ g.Config.Linux.UIDMappings = rootless.MaybeSplitMappings(g.Config.Linux.UIDMappings, availableUIDs)
+ g.Config.Linux.GIDMappings = rootless.MaybeSplitMappings(g.Config.Linux.GIDMappings, availableGIDs)
}
- // We want to have the same network namespace as before.
- if c.config.CreateNetNS {
- netNSPath := ""
- if !c.config.PostConfigureNetNS {
- netNSPath = c.state.NetNS.Path()
- }
+ // Hostname handling:
+ // If we have a UTS namespace, set Hostname in the OCI spec.
+ // Set the HOSTNAME environment variable unless explicitly overridden by
+ // the user (already present in OCI spec). If we don't have a UTS ns,
+ // set it to the host's hostname instead.
+ hostname := c.Hostname()
+ foundUTS := false
- if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), netNSPath); err != nil {
- return nil, 0, err
+ for _, i := range c.config.Spec.Linux.Namespaces {
+ if i.Type == spec.UTSNamespace && i.Path == "" {
+ foundUTS = true
+ g.SetHostname(hostname)
+ break
}
}
-
- if options.Pod != "" {
- // Running in a Pod means that we have to change all namespace settings to
- // the ones from the infrastructure container.
- pod, err := c.runtime.LookupPod(options.Pod)
- if err != nil {
- return nil, 0, fmt.Errorf("pod %q cannot be retrieved: %w", options.Pod, err)
- }
-
- infraContainer, err := pod.InfraContainer()
+ if !foundUTS {
+ tmpHostname, err := os.Hostname()
if err != nil {
- return nil, 0, fmt.Errorf("cannot retrieved infra container from pod %q: %w", options.Pod, err)
- }
-
- infraContainer.lock.Lock()
- if err := infraContainer.syncContainer(); err != nil {
- infraContainer.lock.Unlock()
- return nil, 0, fmt.Errorf("error syncing infrastructure container %s status: %w", infraContainer.ID(), err)
- }
- if infraContainer.state.State != define.ContainerStateRunning {
- if err := infraContainer.initAndStart(ctx); err != nil {
- infraContainer.lock.Unlock()
- return nil, 0, fmt.Errorf("error starting infrastructure container %s status: %w", infraContainer.ID(), err)
- }
- }
- infraContainer.lock.Unlock()
-
- if c.config.IPCNsCtr != "" {
- nsPath, err := infraContainer.namespacePath(IPCNS)
- if err != nil {
- return nil, 0, fmt.Errorf("cannot retrieve IPC namespace path for Pod %q: %w", options.Pod, err)
- }
- if err := g.AddOrReplaceLinuxNamespace(string(spec.IPCNamespace), nsPath); err != nil {
- return nil, 0, err
- }
- }
-
- if c.config.NetNsCtr != "" {
- nsPath, err := infraContainer.namespacePath(NetNS)
- if err != nil {
- return nil, 0, fmt.Errorf("cannot retrieve network namespace path for Pod %q: %w", options.Pod, err)
- }
- if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), nsPath); err != nil {
- return nil, 0, err
- }
- }
-
- if c.config.PIDNsCtr != "" {
- nsPath, err := infraContainer.namespacePath(PIDNS)
- if err != nil {
- return nil, 0, fmt.Errorf("cannot retrieve PID namespace path for Pod %q: %w", options.Pod, err)
- }
- if err := g.AddOrReplaceLinuxNamespace(string(spec.PIDNamespace), nsPath); err != nil {
- return nil, 0, err
- }
- }
-
- if c.config.UTSNsCtr != "" {
- nsPath, err := infraContainer.namespacePath(UTSNS)
- if err != nil {
- return nil, 0, fmt.Errorf("cannot retrieve UTS namespace path for Pod %q: %w", options.Pod, err)
- }
- if err := g.AddOrReplaceLinuxNamespace(string(spec.UTSNamespace), nsPath); err != nil {
- return nil, 0, err
- }
- }
-
- if c.config.CgroupNsCtr != "" {
- nsPath, err := infraContainer.namespacePath(CgroupNS)
- if err != nil {
- return nil, 0, fmt.Errorf("cannot retrieve Cgroup namespace path for Pod %q: %w", options.Pod, err)
- }
- if err := g.AddOrReplaceLinuxNamespace(string(spec.CgroupNamespace), nsPath); err != nil {
- return nil, 0, err
- }
- }
- }
-
- if err := c.makeBindMounts(); err != nil {
- return nil, 0, err
- }
-
- if options.TargetFile != "" || options.CheckpointImageID != "" {
- for dstPath, srcPath := range c.state.BindMounts {
- newMount := spec.Mount{
- Type: "bind",
- Source: srcPath,
- Destination: dstPath,
- Options: []string{"bind", "private"},
- }
- if c.IsReadOnly() && dstPath != "/dev/shm" {
- newMount.Options = append(newMount.Options, "ro", "nosuid", "noexec", "nodev")
- }
- if dstPath == "/dev/shm" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir {
- newMount.Options = append(newMount.Options, "nosuid", "noexec", "nodev")
- }
- if !MountExists(g.Mounts(), dstPath) {
- g.AddMount(newMount)
- }
- }
- }
-
- // Restore /dev/shm content
- if c.config.ShmDir != "" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir {
- shmDirTarFileFullPath := filepath.Join(c.bundlePath(), metadata.DevShmCheckpointTar)
- if _, err := os.Stat(shmDirTarFileFullPath); err != nil {
- logrus.Debug("Container checkpoint doesn't contain dev/shm: ", err.Error())
- } else {
- shmDirTarFile, err := os.Open(shmDirTarFileFullPath)
- if err != nil {
- return nil, 0, err
- }
- defer shmDirTarFile.Close()
-
- if err := archive.UntarUncompressed(shmDirTarFile, c.config.ShmDir, nil); err != nil {
- return nil, 0, err
- }
- }
- }
-
- // Cleanup for a working restore.
- if err := c.removeConmonFiles(); err != nil {
- return nil, 0, err
- }
-
- // Save the OCI spec to disk
- if err := c.saveSpec(g.Config); err != nil {
- return nil, 0, err
- }
-
- // When restoring from an imported archive, allow restoring the content of volumes.
- // Volumes are created in setupContainer()
- if !options.IgnoreVolumes && (options.TargetFile != "" || options.CheckpointImageID != "") {
- for _, v := range c.config.NamedVolumes {
- volumeFilePath := filepath.Join(c.bundlePath(), metadata.CheckpointVolumesDirectory, v.Name+".tar")
-
- volumeFile, err := os.Open(volumeFilePath)
- if err != nil {
- return nil, 0, fmt.Errorf("failed to open volume file %s: %w", volumeFilePath, err)
- }
- defer volumeFile.Close()
-
- volume, err := c.runtime.GetVolume(v.Name)
- if err != nil {
- return nil, 0, fmt.Errorf("failed to retrieve volume %s: %w", v.Name, err)
- }
-
- mountPoint, err := volume.MountPoint()
- if err != nil {
- return nil, 0, err
- }
- if mountPoint == "" {
- return nil, 0, fmt.Errorf("unable to import volume %s as it is not mounted: %w", volume.Name(), err)
- }
- if err := archive.UntarUncompressed(volumeFile, mountPoint, nil); err != nil {
- return nil, 0, fmt.Errorf("failed to extract volume %s to %s: %w", volumeFilePath, mountPoint, err)
- }
+ return err
}
+ hostname = tmpHostname
}
-
- // Before actually restarting the container, apply the root file-system changes
- if !options.IgnoreRootfs {
- if err := crutils.CRApplyRootFsDiffTar(c.bundlePath(), c.state.Mountpoint); err != nil {
- return nil, 0, err
- }
-
- if err := crutils.CRRemoveDeletedFiles(c.ID(), c.bundlePath(), c.state.Mountpoint); err != nil {
- return nil, 0, err
+ needEnv := true
+ for _, checkEnv := range g.Config.Process.Env {
+ if strings.SplitN(checkEnv, "=", 2)[0] == "HOSTNAME" {
+ needEnv = false
+ break
}
}
-
- runtimeRestoreDuration, err = c.ociRuntime.CreateContainer(c, &options)
- if err != nil {
- return nil, 0, err
+ if needEnv {
+ g.AddProcessEnv("HOSTNAME", hostname)
}
- criuStatistics, err = func() (*define.CRIUCheckpointRestoreStatistics, error) {
- if !options.PrintStats {
- return nil, nil
- }
- statsDirectory, err := os.Open(c.bundlePath())
- if err != nil {
- return nil, fmt.Errorf("not able to open %q: %w", c.bundlePath(), err)
- }
-
- restoreStatistics, err := stats.CriuGetRestoreStats(statsDirectory)
- if err != nil {
- return nil, fmt.Errorf("displaying restore statistics not possible: %w", err)
+ if c.config.UTSNsCtr != "" {
+ if err := c.addNamespaceContainer(g, UTSNS, c.config.UTSNsCtr, spec.UTSNamespace); err != nil {
+ return err
}
-
- return &define.CRIUCheckpointRestoreStatistics{
- PagesCompared: restoreStatistics.GetPagesCompared(),
- PagesSkippedCow: restoreStatistics.GetPagesSkippedCow(),
- ForkingTime: restoreStatistics.GetForkingTime(),
- RestoreTime: restoreStatistics.GetRestoreTime(),
- PagesRestored: restoreStatistics.GetPagesRestored(),
- }, nil
- }()
- if err != nil {
- return nil, 0, err
}
-
- logrus.Debugf("Restored container %s", c.ID())
-
- c.state.State = define.ContainerStateRunning
- c.state.Checkpointed = false
- c.state.Restored = true
- c.state.CheckpointedTime = time.Time{}
- c.state.RestoredTime = time.Now()
-
- if !options.Keep {
- // Delete all checkpoint related files. At this point, in theory, all files
- // should exist. Still ignoring errors for now as the container should be
- // restored and running. Not erroring out just because some cleanup operation
- // failed. Starting with the checkpoint directory
- err = os.RemoveAll(c.CheckpointPath())
- if err != nil {
- logrus.Debugf("Non-fatal: removal of checkpoint directory (%s) failed: %v", c.CheckpointPath(), err)
- }
- c.state.CheckpointPath = ""
- err = os.RemoveAll(c.PreCheckPointPath())
- if err != nil {
- logrus.Debugf("Non-fatal: removal of pre-checkpoint directory (%s) failed: %v", c.PreCheckPointPath(), err)
- }
- err = os.RemoveAll(c.CheckpointVolumesPath())
- if err != nil {
- logrus.Debugf("Non-fatal: removal of checkpoint volumes directory (%s) failed: %v", c.CheckpointVolumesPath(), err)
- }
- cleanup := [...]string{
- "restore.log",
- "dump.log",
- stats.StatsDump,
- stats.StatsRestore,
- metadata.DevShmCheckpointTar,
- metadata.NetworkStatusFile,
- metadata.RootFsDiffTar,
- metadata.DeletedFilesFile,
- }
- for _, del := range cleanup {
- file := filepath.Join(c.bundlePath(), del)
- err = os.Remove(file)
- if err != nil {
- logrus.Debugf("Non-fatal: removal of checkpoint file (%s) failed: %v", file, err)
- }
+ if c.config.CgroupNsCtr != "" {
+ if err := c.addNamespaceContainer(g, CgroupNS, c.config.CgroupNsCtr, spec.CgroupNamespace); err != nil {
+ return err
}
- c.state.CheckpointLog = ""
- c.state.RestoreLog = ""
}
- return criuStatistics, runtimeRestoreDuration, c.save()
-}
-
-// Retrieves a container's "root" net namespace container dependency.
-func (c *Container) getRootNetNsDepCtr() (depCtr *Container, err error) {
- containersVisited := map[string]int{c.config.ID: 1}
- nextCtr := c.config.NetNsCtr
- for nextCtr != "" {
- // Make sure we aren't in a loop
- if _, visited := containersVisited[nextCtr]; visited {
- return nil, errors.New("loop encountered while determining net namespace container")
+ if c.config.UserNsCtr == "" && c.config.IDMappings.AutoUserNs {
+ if err := g.AddOrReplaceLinuxNamespace(string(spec.UserNamespace), ""); err != nil {
+ return err
}
- containersVisited[nextCtr] = 1
-
- depCtr, err = c.runtime.state.Container(nextCtr)
- if err != nil {
- return nil, fmt.Errorf("error fetching dependency %s of container %s: %w", c.config.NetNsCtr, c.ID(), err)
+ g.ClearLinuxUIDMappings()
+ for _, uidmap := range c.config.IDMappings.UIDMap {
+ g.AddLinuxUIDMapping(uint32(uidmap.HostID), uint32(uidmap.ContainerID), uint32(uidmap.Size))
}
- // This should never happen without an error
- if depCtr == nil {
- break
+ g.ClearLinuxGIDMappings()
+ for _, gidmap := range c.config.IDMappings.GIDMap {
+ g.AddLinuxGIDMapping(uint32(gidmap.HostID), uint32(gidmap.ContainerID), uint32(gidmap.Size))
}
- nextCtr = depCtr.config.NetNsCtr
- }
-
- if depCtr == nil {
- return nil, errors.New("unexpected error depCtr is nil without reported error from runtime state")
- }
- return depCtr, nil
-}
-
-// Ensure standard bind mounts are mounted into all root directories (including chroot directories)
-func (c *Container) mountIntoRootDirs(mountName string, mountPath string) error {
- c.state.BindMounts[mountName] = mountPath
-
- for _, chrootDir := range c.config.ChrootDirs {
- c.state.BindMounts[filepath.Join(chrootDir, mountName)] = mountPath
}
-
return nil
}
-// Make standard bind mounts to include in the container
-func (c *Container) makeBindMounts() error {
- if err := os.Chown(c.state.RunDir, c.RootUID(), c.RootGID()); err != nil {
- return fmt.Errorf("cannot chown run directory: %w", err)
- }
-
- if c.state.BindMounts == nil {
- c.state.BindMounts = make(map[string]string)
- }
- netDisabled, err := c.NetworkDisabled()
- if err != nil {
- return err
- }
-
- if !netDisabled {
- // If /etc/resolv.conf and /etc/hosts exist, delete them so we
- // will recreate. Only do this if we aren't sharing them with
- // another container.
- if c.config.NetNsCtr == "" {
- if resolvePath, ok := c.state.BindMounts["/etc/resolv.conf"]; ok {
- if err := os.Remove(resolvePath); err != nil && !os.IsNotExist(err) {
- return fmt.Errorf("container %s: %w", c.ID(), err)
- }
- delete(c.state.BindMounts, "/etc/resolv.conf")
- }
- if hostsPath, ok := c.state.BindMounts["/etc/hosts"]; ok {
- if err := os.Remove(hostsPath); err != nil && !os.IsNotExist(err) {
- return fmt.Errorf("container %s: %w", c.ID(), err)
- }
- delete(c.state.BindMounts, "/etc/hosts")
- }
- }
-
- if c.config.NetNsCtr != "" && (!c.config.UseImageResolvConf || !c.config.UseImageHosts) {
- // We share a net namespace.
- // We want /etc/resolv.conf and /etc/hosts from the
- // other container. Unless we're not creating both of
- // them.
- depCtr, err := c.getRootNetNsDepCtr()
- if err != nil {
- return fmt.Errorf("error fetching network namespace dependency container for container %s: %w", c.ID(), err)
- }
-
- // We need that container's bind mounts
- bindMounts, err := depCtr.BindMounts()
- if err != nil {
- return fmt.Errorf("error fetching bind mounts from dependency %s of container %s: %w", depCtr.ID(), c.ID(), err)
- }
-
- // The other container may not have a resolv.conf or /etc/hosts
- // If it doesn't, don't copy them
- resolvPath, exists := bindMounts["/etc/resolv.conf"]
- if !c.config.UseImageResolvConf && exists {
- err := c.mountIntoRootDirs("/etc/resolv.conf", resolvPath)
-
- if err != nil {
- return fmt.Errorf("error assigning mounts to container %s: %w", c.ID(), err)
- }
- }
-
- // check if dependency container has an /etc/hosts file.
- // It may not have one, so only use it if it does.
- hostsPath, exists := bindMounts[config.DefaultHostsFile]
- if !c.config.UseImageHosts && exists {
- // we cannot use the dependency container lock due ABBA deadlocks in cleanup()
- lock, err := lockfile.GetLockfile(hostsPath)
- if err != nil {
- return fmt.Errorf("failed to lock hosts file: %w", err)
- }
- lock.Lock()
-
- // add the newly added container to the hosts file
- // we always use 127.0.0.1 as ip since they have the same netns
- err = etchosts.Add(hostsPath, getLocalhostHostEntry(c))
- lock.Unlock()
- if err != nil {
- return fmt.Errorf("error creating hosts file for container %s which depends on container %s: %w", c.ID(), depCtr.ID(), err)
- }
-
- // finally, save it in the new container
- err = c.mountIntoRootDirs(config.DefaultHostsFile, hostsPath)
- if err != nil {
- return fmt.Errorf("error assigning mounts to container %s: %w", c.ID(), err)
- }
- }
-
- if !hasCurrentUserMapped(c) {
- if err := makeAccessible(resolvPath, c.RootUID(), c.RootGID()); err != nil {
- return err
- }
- if err := makeAccessible(hostsPath, c.RootUID(), c.RootGID()); err != nil {
- return err
- }
- }
- } else {
- if !c.config.UseImageResolvConf {
- if err := c.generateResolvConf(); err != nil {
- return fmt.Errorf("error creating resolv.conf for container %s: %w", c.ID(), err)
- }
- }
-
- if !c.config.UseImageHosts {
- if err := c.createHosts(); err != nil {
- return fmt.Errorf("error creating hosts file for container %s: %w", c.ID(), err)
- }
- }
- }
-
- if c.state.BindMounts["/etc/hosts"] != "" {
- if err := c.relabel(c.state.BindMounts["/etc/hosts"], c.config.MountLabel, true); err != nil {
- return err
- }
- }
-
- if c.state.BindMounts["/etc/resolv.conf"] != "" {
- if err := c.relabel(c.state.BindMounts["/etc/resolv.conf"], c.config.MountLabel, true); err != nil {
- return err
- }
- }
- } else if !c.config.UseImageHosts && c.state.BindMounts["/etc/hosts"] == "" {
- if err := c.createHosts(); err != nil {
- return fmt.Errorf("error creating hosts file for container %s: %w", c.ID(), err)
- }
- }
-
- if c.config.ShmDir != "" {
- // If ShmDir has a value SHM is always added when we mount the container
- c.state.BindMounts["/dev/shm"] = c.config.ShmDir
- }
-
- if c.config.Passwd == nil || *c.config.Passwd {
- newPasswd, newGroup, err := c.generatePasswdAndGroup()
- if err != nil {
- return fmt.Errorf("error creating temporary passwd file for container %s: %w", c.ID(), err)
- }
- if newPasswd != "" {
- // Make /etc/passwd
- // If it already exists, delete so we can recreate
- delete(c.state.BindMounts, "/etc/passwd")
- c.state.BindMounts["/etc/passwd"] = newPasswd
- }
- if newGroup != "" {
- // Make /etc/group
- // If it already exists, delete so we can recreate
- delete(c.state.BindMounts, "/etc/group")
- c.state.BindMounts["/etc/group"] = newGroup
- }
- }
-
- // Make /etc/hostname
- // This should never change, so no need to recreate if it exists
- if _, ok := c.state.BindMounts["/etc/hostname"]; !ok {
- hostnamePath, err := c.writeStringToRundir("hostname", c.Hostname())
- if err != nil {
- return fmt.Errorf("error creating hostname file for container %s: %w", c.ID(), err)
- }
- c.state.BindMounts["/etc/hostname"] = hostnamePath
- }
-
- // Make /etc/localtime
- ctrTimezone := c.Timezone()
- if ctrTimezone != "" {
- // validate the format of the timezone specified if it's not "local"
- if ctrTimezone != "local" {
- _, err = time.LoadLocation(ctrTimezone)
- if err != nil {
- return fmt.Errorf("error finding timezone for container %s: %w", c.ID(), err)
- }
- }
- if _, ok := c.state.BindMounts["/etc/localtime"]; !ok {
- var zonePath string
- if ctrTimezone == "local" {
- zonePath, err = filepath.EvalSymlinks("/etc/localtime")
- if err != nil {
- return fmt.Errorf("error finding local timezone for container %s: %w", c.ID(), err)
+func (c *Container) addRootPropagation(g *generate.Generator, mounts []spec.Mount) error {
+ // Determine property of RootPropagation based on volume properties. If
+ // a volume is shared, then keep root propagation shared. This should
+ // work for slave and private volumes too.
+ //
+ // For slave volumes, it can be either [r]shared/[r]slave.
+ //
+ // For private volumes any root propagation value should work.
+ rootPropagation := ""
+ for _, m := range mounts {
+ for _, opt := range m.Options {
+ switch opt {
+ case MountShared, MountRShared:
+ if rootPropagation != MountShared && rootPropagation != MountRShared {
+ rootPropagation = MountShared
}
- } else {
- zone := filepath.Join("/usr/share/zoneinfo", ctrTimezone)
- zonePath, err = filepath.EvalSymlinks(zone)
- if err != nil {
- return fmt.Errorf("error setting timezone for container %s: %w", c.ID(), err)
+ case MountSlave, MountRSlave:
+ if rootPropagation != MountShared && rootPropagation != MountRShared && rootPropagation != MountSlave && rootPropagation != MountRSlave {
+ rootPropagation = MountRSlave
}
}
- localtimePath, err := c.copyTimezoneFile(zonePath)
- if err != nil {
- return fmt.Errorf("error setting timezone for container %s: %w", c.ID(), err)
- }
- c.state.BindMounts["/etc/localtime"] = localtimePath
- }
- }
-
- _, hasRunContainerenv := c.state.BindMounts["/run/.containerenv"]
- if !hasRunContainerenv {
- // check in the spec mounts
- for _, m := range c.config.Spec.Mounts {
- if m.Destination == "/run/.containerenv" || m.Destination == "/run" {
- hasRunContainerenv = true
- break
- }
}
}
-
- // Make .containerenv if it does not exist
- if !hasRunContainerenv {
- containerenv := c.runtime.graphRootMountedFlag(c.config.Spec.Mounts)
- isRootless := 0
- if rootless.IsRootless() {
- isRootless = 1
- }
- imageID, imageName := c.Image()
-
- if c.Privileged() {
- // Populate the .containerenv with container information
- containerenv = fmt.Sprintf(`engine="podman-%s"
-name=%q
-id=%q
-image=%q
-imageid=%q
-rootless=%d
-%s`, version.Version.String(), c.Name(), c.ID(), imageName, imageID, isRootless, containerenv)
- }
- containerenvPath, err := c.writeStringToRundir(".containerenv", containerenv)
- if err != nil {
- return fmt.Errorf("error creating containerenv file for container %s: %w", c.ID(), err)
- }
- c.state.BindMounts["/run/.containerenv"] = containerenvPath
- }
-
- // Add Subscription Mounts
- subscriptionMounts := subscriptions.MountsWithUIDGID(c.config.MountLabel, c.state.RunDir, c.runtime.config.Containers.DefaultMountsFile, c.state.Mountpoint, c.RootUID(), c.RootGID(), rootless.IsRootless(), false)
- for _, mount := range subscriptionMounts {
- if _, ok := c.state.BindMounts[mount.Destination]; !ok {
- c.state.BindMounts[mount.Destination] = mount.Source
- }
- }
-
- // Secrets are mounted by getting the secret data from the secrets manager,
- // copying the data into the container's static dir,
- // then mounting the copied dir into /run/secrets.
- // The secrets mounting must come after subscription mounts, since subscription mounts
- // creates the /run/secrets dir in the container where we mount as well.
- if len(c.Secrets()) > 0 {
- // create /run/secrets if subscriptions did not create
- if err := c.createSecretMountDir(); err != nil {
- return fmt.Errorf("error creating secrets mount: %w", err)
- }
- for _, secret := range c.Secrets() {
- secretFileName := secret.Name
- base := "/run/secrets"
- if secret.Target != "" {
- secretFileName = secret.Target
- // If absolute path for target given remove base.
- if filepath.IsAbs(secretFileName) {
- base = ""
- }
- }
- src := filepath.Join(c.config.SecretsPath, secret.Name)
- dest := filepath.Join(base, secretFileName)
- c.state.BindMounts[dest] = src
+ if rootPropagation != "" {
+ logrus.Debugf("Set root propagation to %q", rootPropagation)
+ if err := g.SetLinuxRootPropagation(rootPropagation); err != nil {
+ return err
}
}
-
return nil
}
-// generateResolvConf generates a containers resolv.conf
-func (c *Container) generateResolvConf() error {
- var (
- networkNameServers []string
- networkSearchDomains []string
- )
+func (c *Container) setProcessLabel(g *generate.Generator) {
+ g.SetProcessSelinuxLabel(c.ProcessLabel())
+}
- netStatus := c.getNetworkStatus()
- for _, status := range netStatus {
- if status.DNSServerIPs != nil {
- for _, nsIP := range status.DNSServerIPs {
- networkNameServers = append(networkNameServers, nsIP.String())
- }
- logrus.Debugf("Adding nameserver(s) from network status of '%q'", status.DNSServerIPs)
- }
- if status.DNSSearchDomains != nil {
- networkSearchDomains = append(networkSearchDomains, status.DNSSearchDomains...)
- logrus.Debugf("Adding search domain(s) from network status of '%q'", status.DNSSearchDomains)
- }
- }
+func (c *Container) setMountLabel(g *generate.Generator) {
+ g.SetLinuxMountLabel(c.MountLabel())
+}
- ipv6, err := c.checkForIPv6(netStatus)
+func (c *Container) setCgroupsPath(g *generate.Generator) error {
+ cgroupPath, err := c.getOCICgroupPath()
if err != nil {
return err
}
-
- nameservers := make([]string, 0, len(c.runtime.config.Containers.DNSServers)+len(c.config.DNSServer))
- nameservers = append(nameservers, c.runtime.config.Containers.DNSServers...)
- for _, ip := range c.config.DNSServer {
- nameservers = append(nameservers, ip.String())
- }
- // If the user provided dns, it trumps all; then dns masq; then resolv.conf
- var search []string
- keepHostServers := false
- if len(nameservers) == 0 {
- keepHostServers = true
- // first add the nameservers from the networks status
- nameservers = networkNameServers
- // when we add network dns server we also have to add the search domains
- search = networkSearchDomains
- // slirp4netns has a built in DNS forwarder.
- if c.config.NetMode.IsSlirp4netns() {
- slirp4netnsDNS, err := GetSlirp4netnsDNS(c.slirp4netnsSubnet)
- if err != nil {
- logrus.Warn("Failed to determine Slirp4netns DNS: ", err.Error())
- } else {
- nameservers = append(nameservers, slirp4netnsDNS.String())
- }
- }
- }
-
- if len(c.config.DNSSearch) > 0 || len(c.runtime.config.Containers.DNSSearches) > 0 {
- customSearch := make([]string, 0, len(c.config.DNSSearch)+len(c.runtime.config.Containers.DNSSearches))
- customSearch = append(customSearch, c.runtime.config.Containers.DNSSearches...)
- customSearch = append(customSearch, c.config.DNSSearch...)
- search = customSearch
- }
-
- options := make([]string, 0, len(c.config.DNSOption)+len(c.runtime.config.Containers.DNSOptions))
- options = append(options, c.runtime.config.Containers.DNSOptions...)
- options = append(options, c.config.DNSOption...)
-
- destPath := filepath.Join(c.state.RunDir, "resolv.conf")
-
- if err := resolvconf.New(&resolvconf.Params{
- IPv6Enabled: ipv6,
- KeepHostServers: keepHostServers,
- Nameservers: nameservers,
- Namespaces: c.config.Spec.Linux.Namespaces,
- Options: options,
- Path: destPath,
- Searches: search,
- }); err != nil {
- return fmt.Errorf("error building resolv.conf for container %s: %w", c.ID(), err)
- }
-
- return c.bindMountRootFile(destPath, resolvconf.DefaultResolvConf)
+ g.SetLinuxCgroupsPath(cgroupPath)
+ return nil
}
-// Check if a container uses IPv6.
-func (c *Container) checkForIPv6(netStatus map[string]types.StatusBlock) (bool, error) {
- for _, status := range netStatus {
- for _, netInt := range status.Interfaces {
- for _, netAddress := range netInt.Subnets {
- // Note: only using To16() does not work since it also returns a valid ip for ipv4
- if netAddress.IPNet.IP.To4() == nil && netAddress.IPNet.IP.To16() != nil {
- return true, nil
- }
- }
+func (c *Container) addSlirp4netnsDNS(nameservers []string) []string {
+ // slirp4netns has a built in DNS forwarder.
+ if c.config.NetMode.IsSlirp4netns() {
+ slirp4netnsDNS, err := GetSlirp4netnsDNS(c.slirp4netnsSubnet)
+ if err != nil {
+ logrus.Warn("Failed to determine Slirp4netns DNS: ", err.Error())
+ } else {
+ nameservers = append(nameservers, slirp4netnsDNS.String())
}
}
+ return nameservers
+}
+func (c *Container) isSlirp4netnsIPv6() (bool, error) {
if c.config.NetMode.IsSlirp4netns() {
ctrNetworkSlipOpts := []string{}
if c.config.NetworkOptions != nil {
@@ -2428,804 +630,38 @@ func (c *Container) checkForIPv6(netStatus map[string]types.StatusBlock) (bool,
return false, nil
}
-// Add a new nameserver to the container's resolv.conf, ensuring that it is the
-// first nameserver present.
-// Usable only with running containers.
-func (c *Container) addNameserver(ips []string) error {
- // Take no action if container is not running.
- if !c.ensureState(define.ContainerStateRunning, define.ContainerStateCreated) {
- return nil
- }
-
- // Do we have a resolv.conf at all?
- path, ok := c.state.BindMounts[resolvconf.DefaultResolvConf]
- if !ok {
- return nil
- }
-
- if err := resolvconf.Add(path, ips); err != nil {
- return fmt.Errorf("adding new nameserver to container %s resolv.conf: %w", c.ID(), err)
- }
-
- return nil
-}
-
-// Remove an entry from the existing resolv.conf of the container.
-// Usable only with running containers.
-func (c *Container) removeNameserver(ips []string) error {
- // Take no action if container is not running.
- if !c.ensureState(define.ContainerStateRunning, define.ContainerStateCreated) {
- return nil
- }
-
- // Do we have a resolv.conf at all?
- path, ok := c.state.BindMounts[resolvconf.DefaultResolvConf]
- if !ok {
- return nil
- }
-
- if err := resolvconf.Remove(path, ips); err != nil {
- return fmt.Errorf("removing nameservers from container %s resolv.conf: %w", c.ID(), err)
- }
-
- return nil
-}
-
-func getLocalhostHostEntry(c *Container) etchosts.HostEntries {
- return etchosts.HostEntries{{IP: "127.0.0.1", Names: []string{c.Hostname(), c.config.Name}}}
-}
-
-// getHostsEntries returns the container ip host entries for the correct netmode
-func (c *Container) getHostsEntries() (etchosts.HostEntries, error) {
- var entries etchosts.HostEntries
- names := []string{c.Hostname(), c.config.Name}
- switch {
- case c.config.NetMode.IsBridge():
- entries = etchosts.GetNetworkHostEntries(c.state.NetworkStatus, names...)
- case c.config.NetMode.IsSlirp4netns():
- ip, err := GetSlirp4netnsIP(c.slirp4netnsSubnet)
- if err != nil {
- return nil, err
- }
- entries = etchosts.HostEntries{{IP: ip.String(), Names: names}}
- default:
- // check for net=none
- if !c.config.CreateNetNS {
- for _, ns := range c.config.Spec.Linux.Namespaces {
- if ns.Type == spec.NetworkNamespace {
- if ns.Path == "" {
- entries = etchosts.HostEntries{{IP: "127.0.0.1", Names: names}}
- }
- break
+// check for net=none
+func (c *Container) hasNetNone() bool {
+ if !c.config.CreateNetNS {
+ for _, ns := range c.config.Spec.Linux.Namespaces {
+ if ns.Type == spec.NetworkNamespace {
+ if ns.Path == "" {
+ return true
}
}
}
}
- return entries, nil
-}
-
-func (c *Container) createHosts() error {
- var containerIPsEntries etchosts.HostEntries
- var err error
- // if we configure the netns after the container create we should not add
- // the hosts here since we have no information about the actual ips
- // instead we will add them in c.completeNetworkSetup()
- if !c.config.PostConfigureNetNS {
- containerIPsEntries, err = c.getHostsEntries()
- if err != nil {
- return fmt.Errorf("failed to get container ip host entries: %w", err)
- }
- }
- baseHostFile, err := etchosts.GetBaseHostFile(c.runtime.config.Containers.BaseHostsFile, c.state.Mountpoint)
- if err != nil {
- return err
- }
-
- targetFile := filepath.Join(c.state.RunDir, "hosts")
- err = etchosts.New(&etchosts.Params{
- BaseFile: baseHostFile,
- ExtraHosts: c.config.HostAdd,
- ContainerIPs: containerIPsEntries,
- HostContainersInternalIP: etchosts.GetHostContainersInternalIP(c.runtime.config, c.state.NetworkStatus, c.runtime.network),
- TargetFile: targetFile,
- })
- if err != nil {
- return err
- }
-
- return c.bindMountRootFile(targetFile, config.DefaultHostsFile)
-}
-
-// bindMountRootFile will chown and relabel the source file to make it usable in the container.
-// It will also add the path to the container bind mount map.
-// source is the path on the host, dest is the path in the container.
-func (c *Container) bindMountRootFile(source, dest string) error {
- if err := os.Chown(source, c.RootUID(), c.RootGID()); err != nil {
- return err
- }
- if err := label.Relabel(source, c.MountLabel(), false); err != nil {
- return err
- }
-
- return c.mountIntoRootDirs(dest, source)
-}
-
-// generateGroupEntry generates an entry or entries into /etc/group as
-// required by container configuration.
-// Generally speaking, we will make an entry under two circumstances:
-// 1. The container is started as a specific user:group, and that group is both
-// numeric, and does not already exist in /etc/group.
-// 2. It is requested that Libpod add the group that launched Podman to
-// /etc/group via AddCurrentUserPasswdEntry (though this does not trigger if
-// the group in question already exists in /etc/passwd).
-// Returns group entry (as a string that can be appended to /etc/group) and any
-// error that occurred.
-func (c *Container) generateGroupEntry() (string, error) {
- groupString := ""
-
- // Things we *can't* handle: adding the user we added in
- // generatePasswdEntry to any *existing* groups.
- addedGID := 0
- if c.config.AddCurrentUserPasswdEntry {
- entry, gid, err := c.generateCurrentUserGroupEntry()
- if err != nil {
- return "", err
- }
- groupString += entry
- addedGID = gid
- }
- if c.config.User != "" {
- entry, err := c.generateUserGroupEntry(addedGID)
- if err != nil {
- return "", err
- }
- groupString += entry
- }
-
- return groupString, nil
-}
-
-// Make an entry in /etc/group for the group of the user running podman iff we
-// are rootless.
-func (c *Container) generateCurrentUserGroupEntry() (string, int, error) {
- gid := rootless.GetRootlessGID()
- if gid == 0 {
- return "", 0, nil
- }
-
- g, err := user.LookupGroupId(strconv.Itoa(gid))
- if err != nil {
- return "", 0, fmt.Errorf("failed to get current group: %w", err)
- }
-
- // Look up group name to see if it exists in the image.
- _, err = lookup.GetGroup(c.state.Mountpoint, g.Name)
- if err != runcuser.ErrNoGroupEntries {
- return "", 0, err
- }
-
- // Look up GID to see if it exists in the image.
- _, err = lookup.GetGroup(c.state.Mountpoint, g.Gid)
- if err != runcuser.ErrNoGroupEntries {
- return "", 0, err
- }
-
- // We need to get the username of the rootless user so we can add it to
- // the group.
- username := ""
- uid := rootless.GetRootlessUID()
- if uid != 0 {
- u, err := user.LookupId(strconv.Itoa(uid))
- if err != nil {
- return "", 0, fmt.Errorf("failed to get current user to make group entry: %w", err)
- }
- username = u.Username
- }
-
- // Make the entry.
- return fmt.Sprintf("%s:x:%s:%s\n", g.Name, g.Gid, username), gid, nil
-}
-
-// Make an entry in /etc/group for the group the container was specified to run
-// as.
-func (c *Container) generateUserGroupEntry(addedGID int) (string, error) {
- if c.config.User == "" {
- return "", nil
- }
-
- splitUser := strings.SplitN(c.config.User, ":", 2)
- group := splitUser[0]
- if len(splitUser) > 1 {
- group = splitUser[1]
- }
-
- gid, err := strconv.ParseUint(group, 10, 32)
- if err != nil {
- return "", nil //nolint: nilerr
- }
-
- if addedGID != 0 && addedGID == int(gid) {
- return "", nil
- }
-
- // Check if the group already exists
- _, err = lookup.GetGroup(c.state.Mountpoint, group)
- if err != runcuser.ErrNoGroupEntries {
- return "", err
- }
-
- return fmt.Sprintf("%d:x:%d:%s\n", gid, gid, splitUser[0]), nil
-}
-
-// generatePasswdEntry generates an entry or entries into /etc/passwd as
-// required by container configuration.
-// Generally speaking, we will make an entry under two circumstances:
-// 1. The container is started as a specific user who is not in /etc/passwd.
-// This only triggers if the user is given as a *numeric* ID.
-// 2. It is requested that Libpod add the user that launched Podman to
-// /etc/passwd via AddCurrentUserPasswdEntry (though this does not trigger if
-// the user in question already exists in /etc/passwd) or the UID to be added
-// is 0).
-// 3. The user specified additional host user accounts to add the the /etc/passwd file
-// Returns password entry (as a string that can be appended to /etc/passwd) and
-// any error that occurred.
-func (c *Container) generatePasswdEntry() (string, error) {
- passwdString := ""
-
- addedUID := 0
- for _, userid := range c.config.HostUsers {
- // Look up User on host
- u, err := util.LookupUser(userid)
- if err != nil {
- return "", err
- }
- entry, err := c.userPasswdEntry(u)
- if err != nil {
- return "", err
- }
- passwdString += entry
- }
- if c.config.AddCurrentUserPasswdEntry {
- entry, uid, _, err := c.generateCurrentUserPasswdEntry()
- if err != nil {
- return "", err
- }
- passwdString += entry
- addedUID = uid
- }
- if c.config.User != "" {
- entry, err := c.generateUserPasswdEntry(addedUID)
- if err != nil {
- return "", err
- }
- passwdString += entry
- }
-
- return passwdString, nil
-}
-
-// generateCurrentUserPasswdEntry generates an /etc/passwd entry for the user
-// running the container engine.
-// Returns a passwd entry for the user, and the UID and GID of the added entry.
-func (c *Container) generateCurrentUserPasswdEntry() (string, int, int, error) {
- uid := rootless.GetRootlessUID()
- if uid == 0 {
- return "", 0, 0, nil
- }
-
- u, err := user.LookupId(strconv.Itoa(uid))
- if err != nil {
- return "", 0, 0, fmt.Errorf("failed to get current user: %w", err)
- }
- pwd, err := c.userPasswdEntry(u)
- if err != nil {
- return "", 0, 0, err
- }
-
- return pwd, uid, rootless.GetRootlessGID(), nil
-}
-
-func (c *Container) userPasswdEntry(u *user.User) (string, error) {
- // Look up the user to see if it exists in the container image.
- _, err := lookup.GetUser(c.state.Mountpoint, u.Username)
- if err != runcuser.ErrNoPasswdEntries {
- return "", err
- }
-
- // Look up the UID to see if it exists in the container image.
- _, err = lookup.GetUser(c.state.Mountpoint, u.Uid)
- if err != runcuser.ErrNoPasswdEntries {
- return "", err
- }
-
- // If the user's actual home directory exists, or was mounted in - use
- // that.
- homeDir := c.WorkingDir()
- hDir := u.HomeDir
- for hDir != "/" {
- if MountExists(c.config.Spec.Mounts, hDir) {
- homeDir = u.HomeDir
- break
- }
- hDir = filepath.Dir(hDir)
- }
- if homeDir != u.HomeDir {
- for _, hDir := range c.UserVolumes() {
- if hDir == u.HomeDir {
- homeDir = u.HomeDir
- break
- }
- }
- }
- // Set HOME environment if not already set
- hasHomeSet := false
- for _, s := range c.config.Spec.Process.Env {
- if strings.HasPrefix(s, "HOME=") {
- hasHomeSet = true
- break
- }
- }
- if !hasHomeSet {
- c.config.Spec.Process.Env = append(c.config.Spec.Process.Env, fmt.Sprintf("HOME=%s", homeDir))
- }
- if c.config.PasswdEntry != "" {
- return c.passwdEntry(u.Username, u.Uid, u.Gid, u.Name, homeDir), nil
- }
-
- return fmt.Sprintf("%s:*:%s:%s:%s:%s:/bin/sh\n", u.Username, u.Uid, u.Gid, u.Name, homeDir), nil
-}
-
-// generateUserPasswdEntry generates an /etc/passwd entry for the container user
-// to run in the container.
-// The UID and GID of the added entry will also be returned.
-// Accepts one argument, that being any UID that has already been added to the
-// passwd file by other functions; if it matches the UID we were given, we don't
-// need to do anything.
-func (c *Container) generateUserPasswdEntry(addedUID int) (string, error) {
- var (
- groupspec string
- gid int
- )
- if c.config.User == "" {
- return "", nil
- }
- splitSpec := strings.SplitN(c.config.User, ":", 2)
- userspec := splitSpec[0]
- if len(splitSpec) > 1 {
- groupspec = splitSpec[1]
- }
- // If a non numeric User, then don't generate passwd
- uid, err := strconv.ParseUint(userspec, 10, 32)
- if err != nil {
- return "", nil //nolint: nilerr
- }
-
- if addedUID != 0 && int(uid) == addedUID {
- return "", nil
- }
-
- // Look up the user to see if it exists in the container image
- _, err = lookup.GetUser(c.state.Mountpoint, userspec)
- if err != runcuser.ErrNoPasswdEntries {
- return "", err
- }
-
- if groupspec != "" {
- ugid, err := strconv.ParseUint(groupspec, 10, 32)
- if err == nil {
- gid = int(ugid)
- } else {
- group, err := lookup.GetGroup(c.state.Mountpoint, groupspec)
- if err != nil {
- return "", fmt.Errorf("unable to get gid %s from group file: %w", groupspec, err)
- }
- gid = group.Gid
- }
- }
-
- if c.config.PasswdEntry != "" {
- entry := c.passwdEntry(fmt.Sprintf("%d", uid), fmt.Sprintf("%d", uid), fmt.Sprintf("%d", gid), "container user", c.WorkingDir())
- return entry, nil
- }
-
- return fmt.Sprintf("%d:*:%d:%d:container user:%s:/bin/sh\n", uid, uid, gid, c.WorkingDir()), nil
-}
-
-func (c *Container) passwdEntry(username string, uid, gid, name, homeDir string) string {
- s := c.config.PasswdEntry
- s = strings.ReplaceAll(s, "$USERNAME", username)
- s = strings.ReplaceAll(s, "$UID", uid)
- s = strings.ReplaceAll(s, "$GID", gid)
- s = strings.ReplaceAll(s, "$NAME", name)
- s = strings.ReplaceAll(s, "$HOME", homeDir)
- return s + "\n"
-}
-
-// generatePasswdAndGroup generates container-specific passwd and group files
-// iff g.config.User is a number or we are configured to make a passwd entry for
-// the current user or the user specified HostsUsers
-// Returns path to file to mount at /etc/passwd, path to file to mount at
-// /etc/group, and any error that occurred. If no passwd/group file were
-// required, the empty string will be returned for those path (this may occur
-// even if no error happened).
-// This may modify the mounted container's /etc/passwd and /etc/group instead of
-// making copies to bind-mount in, so we don't break useradd (it wants to make a
-// copy of /etc/passwd and rename the copy to /etc/passwd, which is impossible
-// with a bind mount). This is done in cases where the container is *not*
-// read-only. In this case, the function will return nothing ("", "", nil).
-func (c *Container) generatePasswdAndGroup() (string, string, error) {
- if !c.config.AddCurrentUserPasswdEntry && c.config.User == "" &&
- len(c.config.HostUsers) == 0 {
- return "", "", nil
- }
-
- needPasswd := true
- needGroup := true
-
- // First, check if there's a mount at /etc/passwd or group, we don't
- // want to interfere with user mounts.
- if MountExists(c.config.Spec.Mounts, "/etc/passwd") {
- needPasswd = false
- }
- if MountExists(c.config.Spec.Mounts, "/etc/group") {
- needGroup = false
- }
-
- // Next, check if we already made the files. If we didn't, don't need to
- // do anything more.
- if needPasswd {
- passwdPath := filepath.Join(c.config.StaticDir, "passwd")
- if _, err := os.Stat(passwdPath); err == nil {
- needPasswd = false
- }
- }
- if needGroup {
- groupPath := filepath.Join(c.config.StaticDir, "group")
- if _, err := os.Stat(groupPath); err == nil {
- needGroup = false
- }
- }
-
- // If we don't need a /etc/passwd or /etc/group at this point we can
- // just return.
- if !needPasswd && !needGroup {
- return "", "", nil
- }
-
- passwdPath := ""
- groupPath := ""
-
- ro := c.IsReadOnly()
-
- if needPasswd {
- passwdEntry, err := c.generatePasswdEntry()
- if err != nil {
- return "", "", err
- }
-
- needsWrite := passwdEntry != ""
- switch {
- case ro && needsWrite:
- logrus.Debugf("Making /etc/passwd for container %s", c.ID())
- originPasswdFile, err := securejoin.SecureJoin(c.state.Mountpoint, "/etc/passwd")
- if err != nil {
- return "", "", fmt.Errorf("error creating path to container %s /etc/passwd: %w", c.ID(), err)
- }
- orig, err := ioutil.ReadFile(originPasswdFile)
- if err != nil && !os.IsNotExist(err) {
- return "", "", err
- }
- passwdFile, err := c.writeStringToStaticDir("passwd", string(orig)+passwdEntry)
- if err != nil {
- return "", "", fmt.Errorf("failed to create temporary passwd file: %w", err)
- }
- if err := os.Chmod(passwdFile, 0644); err != nil {
- return "", "", err
- }
- passwdPath = passwdFile
- case !ro && needsWrite:
- logrus.Debugf("Modifying container %s /etc/passwd", c.ID())
- containerPasswd, err := securejoin.SecureJoin(c.state.Mountpoint, "/etc/passwd")
- if err != nil {
- return "", "", fmt.Errorf("error looking up location of container %s /etc/passwd: %w", c.ID(), err)
- }
-
- f, err := os.OpenFile(containerPasswd, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0600)
- if err != nil {
- return "", "", fmt.Errorf("container %s: %w", c.ID(), err)
- }
- defer f.Close()
-
- if _, err := f.WriteString(passwdEntry); err != nil {
- return "", "", fmt.Errorf("unable to append to container %s /etc/passwd: %w", c.ID(), err)
- }
- default:
- logrus.Debugf("Not modifying container %s /etc/passwd", c.ID())
- }
- }
- if needGroup {
- groupEntry, err := c.generateGroupEntry()
- if err != nil {
- return "", "", err
- }
-
- needsWrite := groupEntry != ""
- switch {
- case ro && needsWrite:
- logrus.Debugf("Making /etc/group for container %s", c.ID())
- originGroupFile, err := securejoin.SecureJoin(c.state.Mountpoint, "/etc/group")
- if err != nil {
- return "", "", fmt.Errorf("error creating path to container %s /etc/group: %w", c.ID(), err)
- }
- orig, err := ioutil.ReadFile(originGroupFile)
- if err != nil && !os.IsNotExist(err) {
- return "", "", err
- }
- groupFile, err := c.writeStringToStaticDir("group", string(orig)+groupEntry)
- if err != nil {
- return "", "", fmt.Errorf("failed to create temporary group file: %w", err)
- }
- if err := os.Chmod(groupFile, 0644); err != nil {
- return "", "", err
- }
- groupPath = groupFile
- case !ro && needsWrite:
- logrus.Debugf("Modifying container %s /etc/group", c.ID())
- containerGroup, err := securejoin.SecureJoin(c.state.Mountpoint, "/etc/group")
- if err != nil {
- return "", "", fmt.Errorf("error looking up location of container %s /etc/group: %w", c.ID(), err)
- }
-
- f, err := os.OpenFile(containerGroup, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0600)
- if err != nil {
- return "", "", fmt.Errorf("container %s: %w", c.ID(), err)
- }
- defer f.Close()
-
- if _, err := f.WriteString(groupEntry); err != nil {
- return "", "", fmt.Errorf("unable to append to container %s /etc/group: %w", c.ID(), err)
- }
- default:
- logrus.Debugf("Not modifying container %s /etc/group", c.ID())
- }
- }
-
- return passwdPath, groupPath, nil
-}
-
-func isRootlessCgroupSet(cgroup string) bool {
- // old versions of podman were setting the CgroupParent to CgroupfsDefaultCgroupParent
- // by default. Avoid breaking these versions and check whether the cgroup parent is
- // set to the default and in this case enable the old behavior. It should not be a real
- // problem because the default CgroupParent is usually owned by root so rootless users
- // cannot access it.
- // This check might be lifted in a future version of Podman.
- // Check both that the cgroup or its parent is set to the default value (used by pods).
- return cgroup != CgroupfsDefaultCgroupParent && filepath.Dir(cgroup) != CgroupfsDefaultCgroupParent
-}
-
-func (c *Container) expectPodCgroup() (bool, error) {
- unified, err := cgroups.IsCgroup2UnifiedMode()
- if err != nil {
- return false, err
- }
- cgroupManager := c.CgroupManager()
- switch {
- case c.config.NoCgroups:
- return false, nil
- case cgroupManager == config.SystemdCgroupsManager:
- return !rootless.IsRootless() || unified, nil
- case cgroupManager == config.CgroupfsCgroupsManager:
- return !rootless.IsRootless(), nil
- default:
- return false, fmt.Errorf("invalid cgroup mode %s requested for pods: %w", cgroupManager, define.ErrInvalidArg)
- }
-}
-
-// Get cgroup path in a format suitable for the OCI spec
-func (c *Container) getOCICgroupPath() (string, error) {
- unified, err := cgroups.IsCgroup2UnifiedMode()
- if err != nil {
- return "", err
- }
- cgroupManager := c.CgroupManager()
- switch {
- case c.config.NoCgroups:
- return "", nil
- case c.config.CgroupsMode == cgroupSplit:
- selfCgroup, err := utils.GetOwnCgroupDisallowRoot()
- if err != nil {
- return "", err
- }
- return filepath.Join(selfCgroup, fmt.Sprintf("libpod-payload-%s", c.ID())), nil
- case cgroupManager == config.SystemdCgroupsManager:
- // When the OCI runtime is set to use Systemd as a cgroup manager, it
- // expects cgroups to be passed as follows:
- // slice:prefix:name
- systemdCgroups := fmt.Sprintf("%s:libpod:%s", path.Base(c.config.CgroupParent), c.ID())
- logrus.Debugf("Setting Cgroups for container %s to %s", c.ID(), systemdCgroups)
- return systemdCgroups, nil
- case (rootless.IsRootless() && (cgroupManager == config.CgroupfsCgroupsManager || !unified)):
- if c.config.CgroupParent == "" || !isRootlessCgroupSet(c.config.CgroupParent) {
- return "", nil
- }
- fallthrough
- case cgroupManager == config.CgroupfsCgroupsManager:
- cgroupPath := filepath.Join(c.config.CgroupParent, fmt.Sprintf("libpod-%s", c.ID()))
- logrus.Debugf("Setting Cgroup path for container %s to %s", c.ID(), cgroupPath)
- return cgroupPath, nil
- default:
- return "", fmt.Errorf("invalid cgroup manager %s requested: %w", cgroupManager, define.ErrInvalidArg)
- }
-}
-
-func (c *Container) copyTimezoneFile(zonePath string) (string, error) {
- localtimeCopy := filepath.Join(c.state.RunDir, "localtime")
- file, err := os.Stat(zonePath)
- if err != nil {
- return "", err
- }
- if file.IsDir() {
- return "", errors.New("invalid timezone: is a directory")
- }
- src, err := os.Open(zonePath)
- if err != nil {
- return "", err
- }
- defer src.Close()
- dest, err := os.Create(localtimeCopy)
- if err != nil {
- return "", err
- }
- defer dest.Close()
- _, err = io.Copy(dest, src)
- if err != nil {
- return "", err
- }
- if err := c.relabel(localtimeCopy, c.config.MountLabel, false); err != nil {
- return "", err
- }
- if err := dest.Chown(c.RootUID(), c.RootGID()); err != nil {
- return "", err
- }
- return localtimeCopy, err
-}
-
-func (c *Container) cleanupOverlayMounts() error {
- return overlay.CleanupContent(c.config.StaticDir)
-}
-
-// Creates and mounts an empty dir to mount secrets into, if it does not already exist
-func (c *Container) createSecretMountDir() error {
- src := filepath.Join(c.state.RunDir, "/run/secrets")
- _, err := os.Stat(src)
- if os.IsNotExist(err) {
- oldUmask := umask.Set(0)
- defer umask.Set(oldUmask)
-
- if err := os.MkdirAll(src, 0755); err != nil {
- return err
- }
- if err := label.Relabel(src, c.config.MountLabel, false); err != nil {
- return err
- }
- if err := os.Chown(src, c.RootUID(), c.RootGID()); err != nil {
- return err
- }
- c.state.BindMounts["/run/secrets"] = src
- return nil
- }
-
- return err
+ return false
}
-// Fix ownership and permissions of the specified volume if necessary.
-func (c *Container) fixVolumePermissions(v *ContainerNamedVolume) error {
- vol, err := c.runtime.state.Volume(v.Name)
- if err != nil {
- return fmt.Errorf("error retrieving named volume %s for container %s: %w", v.Name, c.ID(), err)
- }
-
- vol.lock.Lock()
- defer vol.lock.Unlock()
-
- // The volume may need a copy-up. Check the state.
- if err := vol.update(); err != nil {
+func setVolumeAtime(mountPoint string, st os.FileInfo) error {
+ stat := st.Sys().(*syscall.Stat_t)
+ atime := time.Unix(int64(stat.Atim.Sec), int64(stat.Atim.Nsec)) //nolint: unconvert
+ if err := os.Chtimes(mountPoint, atime, st.ModTime()); err != nil {
return err
}
-
- // Volumes owned by a volume driver are not chowned - we don't want to
- // mess with a mount not managed by us.
- if vol.state.NeedsChown && !vol.UsesVolumeDriver() {
- vol.state.NeedsChown = false
-
- uid := int(c.config.Spec.Process.User.UID)
- gid := int(c.config.Spec.Process.User.GID)
-
- if c.config.IDMappings.UIDMap != nil {
- p := idtools.IDPair{
- UID: uid,
- GID: gid,
- }
- mappings := idtools.NewIDMappingsFromMaps(c.config.IDMappings.UIDMap, c.config.IDMappings.GIDMap)
- newPair, err := mappings.ToHost(p)
- if err != nil {
- return fmt.Errorf("error mapping user %d:%d: %w", uid, gid, err)
- }
- uid = newPair.UID
- gid = newPair.GID
- }
-
- vol.state.UIDChowned = uid
- vol.state.GIDChowned = gid
-
- if err := vol.save(); err != nil {
- return err
- }
-
- mountPoint, err := vol.MountPoint()
- if err != nil {
- return err
- }
-
- if err := os.Lchown(mountPoint, uid, gid); err != nil {
- return err
- }
-
- // Make sure the new volume matches the permissions of the target directory.
- // https://github.com/containers/podman/issues/10188
- st, err := os.Lstat(filepath.Join(c.state.Mountpoint, v.Dest))
- if err == nil {
- if stat, ok := st.Sys().(*syscall.Stat_t); ok {
- if err := os.Lchown(mountPoint, int(stat.Uid), int(stat.Gid)); err != nil {
- return err
- }
- }
- if err := os.Chmod(mountPoint, st.Mode()); err != nil {
- return err
- }
- stat := st.Sys().(*syscall.Stat_t)
- atime := time.Unix(int64(stat.Atim.Sec), int64(stat.Atim.Nsec)) //nolint: unconvert
- if err := os.Chtimes(mountPoint, atime, st.ModTime()); err != nil {
- return err
- }
- } else if !os.IsNotExist(err) {
- return err
- }
- }
return nil
}
-func (c *Container) relabel(src, mountLabel string, recurse bool) error {
- if !selinux.GetEnabled() || mountLabel == "" {
- return nil
- }
- // only relabel on initial creation of container
- if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateUnknown) {
- label, err := label.FileLabel(src)
- if err != nil {
- return err
- }
- // If labels are different, might be on a tmpfs
- if label == mountLabel {
- return nil
- }
- }
- return label.Relabel(src, mountLabel, recurse)
-}
-
-func (c *Container) ChangeHostPathOwnership(src string, recurse bool, uid, gid int) error {
- // only chown on initial creation of container
- if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateUnknown) {
- st, err := os.Stat(src)
+func (c *Container) makePlatformBindMounts() error {
+ // Make /etc/hostname
+ // This should never change, so no need to recreate if it exists
+ if _, ok := c.state.BindMounts["/etc/hostname"]; !ok {
+ hostnamePath, err := c.writeStringToRundir("hostname", c.Hostname())
if err != nil {
- return err
- }
-
- // If labels are different, might be on a tmpfs
- if int(st.Sys().(*syscall.Stat_t).Uid) == uid && int(st.Sys().(*syscall.Stat_t).Gid) == gid {
- return nil
+ return fmt.Errorf("creating hostname file for container %s: %w", c.ID(), err)
}
+ c.state.BindMounts["/etc/hostname"] = hostnamePath
}
- return chown.ChangeHostPathOwnership(src, recurse, uid, gid)
+ return nil
}
diff --git a/libpod/container_internal_unsupported.go b/libpod/container_internal_unsupported.go
new file mode 100644
index 000000000..1967c577b
--- /dev/null
+++ b/libpod/container_internal_unsupported.go
@@ -0,0 +1,99 @@
+//go:build !linux && !freebsd
+// +build !linux,!freebsd
+
+package libpod
+
+import (
+ "context"
+ "errors"
+
+ "github.com/containers/common/libnetwork/etchosts"
+ "github.com/containers/podman/v4/libpod/define"
+ "github.com/containers/podman/v4/pkg/lookup"
+ spec "github.com/opencontainers/runtime-spec/specs-go"
+)
+
+func (c *Container) mountSHM(shmOptions string) error {
+ return errors.New("not implemented (*Container) mountSHM")
+}
+
+func (c *Container) unmountSHM(mount string) error {
+ return errors.New("not implemented (*Container) unmountSHM")
+}
+
+func (c *Container) cleanupOverlayMounts() error {
+ return errors.New("not implemented (*Container) cleanupOverlayMounts")
+}
+
+// prepare mounts the container and sets up other required resources like net
+// namespaces
+func (c *Container) prepare() error {
+ return errors.New("not implemented (*Container) prepare")
+}
+
+// resolveWorkDir resolves the container's workdir and, depending on the
+// configuration, will create it, or error out if it does not exist.
+// Note that the container must be mounted before.
+func (c *Container) resolveWorkDir() error {
+ return errors.New("not implemented (*Container) resolveWorkDir")
+}
+
+// cleanupNetwork unmounts and cleans up the container's network
+func (c *Container) cleanupNetwork() error {
+ return errors.New("not implemented (*Container) cleanupNetwork")
+}
+
+// reloadNetwork reloads the network for the given container, recreating
+// firewall rules.
+func (c *Container) reloadNetwork() error {
+ return errors.New("not implemented (*Container) reloadNetwork")
+}
+
+// Generate spec for a container
+// Accepts a map of the container's dependencies
+func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
+ return nil, errors.New("not implemented (*Container) generateSpec")
+}
+
+func (c *Container) getUserOverrides() *lookup.Overrides {
+ return &lookup.Overrides{}
+}
+
+func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointOptions) (*define.CRIUCheckpointRestoreStatistics, int64, error) {
+ return nil, 0, errors.New("not implemented (*Container) checkpoint")
+}
+
+func (c *Container) restore(ctx context.Context, options ContainerCheckpointOptions) (criuStatistics *define.CRIUCheckpointRestoreStatistics, runtimeRestoreDuration int64, retErr error) {
+ return nil, 0, errors.New("not implemented (*Container) restore")
+}
+
+// getHostsEntries returns the container ip host entries for the correct netmode
+func (c *Container) getHostsEntries() (etchosts.HostEntries, error) {
+ return nil, errors.New("unsupported (*Container) getHostsEntries")
+}
+
+// Fix ownership and permissions of the specified volume if necessary.
+func (c *Container) fixVolumePermissions(v *ContainerNamedVolume) error {
+ return errors.New("unsupported (*Container) fixVolumePermissions")
+}
+
+func (c *Container) expectPodCgroup() (bool, error) {
+ return false, errors.New("unsupported (*Container) expectPodCgroup")
+}
+
+// Get cgroup path in a format suitable for the OCI spec
+func (c *Container) getOCICgroupPath() (string, error) {
+ return "", errors.New("unsupported (*Container) getOCICgroupPath")
+}
+
+func getLocalhostHostEntry(c *Container) etchosts.HostEntries {
+ return nil
+}
+
+func isRootlessCgroupSet(cgroup string) bool {
+ return false
+}
+
+func openDirectory(path string) (fd int, err error) {
+ return -1, errors.New("unsupported openDirectory")
+}
diff --git a/libpod/container_linux.go b/libpod/container_linux.go
index 8b517e69f..9c17a1966 100644
--- a/libpod/container_linux.go
+++ b/libpod/container_linux.go
@@ -5,6 +5,7 @@ package libpod
import (
"github.com/containernetworking/plugins/pkg/ns"
+ spec "github.com/opencontainers/runtime-spec/specs-go"
)
type containerPlatformState struct {
@@ -13,3 +14,17 @@ type containerPlatformState struct {
// told to join another container's network namespace
NetNS ns.NetNS `json:"-"`
}
+
+func networkDisabled(c *Container) (bool, error) {
+ if c.config.CreateNetNS {
+ return false, nil
+ }
+ if !c.config.PostConfigureNetNS {
+ for _, ns := range c.config.Spec.Linux.Namespaces {
+ if ns.Type == spec.NetworkNamespace {
+ return ns.Path == "", nil
+ }
+ }
+ }
+ return false, nil
+}
diff --git a/libpod/container_path_resolution.go b/libpod/container_path_resolution.go
index 35622d623..cd86df540 100644
--- a/libpod/container_path_resolution.go
+++ b/libpod/container_path_resolution.go
@@ -119,15 +119,29 @@ func findVolume(c *Container, containerPath string) (*Volume, error) {
return nil, nil
}
+// isSubDir checks whether path is a subdirectory of root.
+func isSubDir(path, root string) bool {
+ // check if the specified container path is below a bind mount.
+ rel, err := filepath.Rel(root, path)
+ if err != nil {
+ return false
+ }
+ return rel != ".." && !strings.HasPrefix(rel, "../")
+}
+
// isPathOnVolume returns true if the specified containerPath is a subdir of any
// Volume's destination.
func isPathOnVolume(c *Container, containerPath string) bool {
cleanedContainerPath := filepath.Clean(containerPath)
for _, vol := range c.config.NamedVolumes {
- if cleanedContainerPath == filepath.Clean(vol.Dest) {
+ cleanedDestination := filepath.Clean(vol.Dest)
+ if cleanedContainerPath == cleanedDestination {
return true
}
- for dest := vol.Dest; dest != "/" && dest != "."; dest = filepath.Dir(dest) {
+ if isSubDir(cleanedContainerPath, cleanedDestination) {
+ return true
+ }
+ for dest := cleanedDestination; dest != "/" && dest != "."; dest = filepath.Dir(dest) {
if cleanedContainerPath == dest {
return true
}
@@ -152,15 +166,19 @@ func findBindMount(c *Container, containerPath string) *specs.Mount {
return nil
}
-/// isPathOnBindMount returns true if the specified containerPath is a subdir of any
+/// isPathOnMount returns true if the specified containerPath is a subdir of any
// Mount's destination.
-func isPathOnBindMount(c *Container, containerPath string) bool {
+func isPathOnMount(c *Container, containerPath string) bool {
cleanedContainerPath := filepath.Clean(containerPath)
for _, m := range c.config.Spec.Mounts {
- if cleanedContainerPath == filepath.Clean(m.Destination) {
+ cleanedDestination := filepath.Clean(m.Destination)
+ if cleanedContainerPath == cleanedDestination {
+ return true
+ }
+ if isSubDir(cleanedContainerPath, cleanedDestination) {
return true
}
- for dest := m.Destination; dest != "/" && dest != "."; dest = filepath.Dir(dest) {
+ for dest := cleanedDestination; dest != "/" && dest != "."; dest = filepath.Dir(dest) {
if cleanedContainerPath == dest {
return true
}
diff --git a/libpod/container_path_resolution_test.go b/libpod/container_path_resolution_test.go
new file mode 100644
index 000000000..f906c752d
--- /dev/null
+++ b/libpod/container_path_resolution_test.go
@@ -0,0 +1,28 @@
+package libpod
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+)
+
+func TestIsSubDir(t *testing.T) {
+ assert.True(t, isSubDir("/foo", "/foo"))
+ assert.True(t, isSubDir("/foo/bar", "/foo"))
+ assert.True(t, isSubDir("/foo/bar", "/foo/"))
+ assert.True(t, isSubDir("/foo/bar", "/foo//"))
+ assert.True(t, isSubDir("/foo/bar/", "/foo"))
+ assert.True(t, isSubDir("/foo/bar/baz/", "/foo"))
+ assert.True(t, isSubDir("/foo/bar/baz/", "/foo/bar"))
+ assert.True(t, isSubDir("/foo/bar/baz/", "/foo/bar/"))
+ assert.False(t, isSubDir("/foo/bar/baz/", "/foobar/"))
+ assert.False(t, isSubDir("/foo/bar/baz/../../", "/foobar/"))
+ assert.False(t, isSubDir("/foo/bar/baz/", "../foo/bar"))
+ assert.False(t, isSubDir("/foo/bar/baz/", "../foo/"))
+ assert.False(t, isSubDir("/foo/bar/baz/", "../foo"))
+ assert.False(t, isSubDir("/", ".."))
+ assert.False(t, isSubDir("//", ".."))
+ assert.False(t, isSubDir("//", "../"))
+ assert.False(t, isSubDir("//", "..//"))
+ assert.True(t, isSubDir("/foo/bar/baz/../../", "/foo/"))
+}
diff --git a/libpod/container_stat_linux.go b/libpod/container_stat_linux.go
index 72aabb516..dc3a524f5 100644
--- a/libpod/container_stat_linux.go
+++ b/libpod/container_stat_linux.go
@@ -168,7 +168,7 @@ func secureStat(root string, path string) (*copier.StatForItem, error) {
if stat.IsSymlink {
target, err := copier.Eval(root, path, copier.EvalOptions{})
if err != nil {
- return nil, fmt.Errorf("error evaluating symlink in container: %w", err)
+ return nil, fmt.Errorf("evaluating symlink in container: %w", err)
}
// Need to make sure the symlink is relative to the root!
target = strings.TrimPrefix(target, root)
diff --git a/libpod/container_stat_unsupported.go b/libpod/container_stat_unsupported.go
new file mode 100644
index 000000000..2f1acd44d
--- /dev/null
+++ b/libpod/container_stat_unsupported.go
@@ -0,0 +1,14 @@
+//go:build !linux
+// +build !linux
+
+package libpod
+
+import (
+ "errors"
+
+ "github.com/containers/podman/v4/libpod/define"
+)
+
+func (c *Container) stat(containerMountPoint string, containerPath string) (*define.FileInfo, string, string, error) {
+ return nil, "", "", errors.New("Containers stat not supported on this platform")
+}
diff --git a/libpod/container_top_linux.go b/libpod/container_top_linux.go
index 5571edf73..b9916c3a3 100644
--- a/libpod/container_top_linux.go
+++ b/libpod/container_top_linux.go
@@ -70,7 +70,7 @@ func (c *Container) Top(descriptors []string) ([]string, error) {
output, err = c.execPS(psDescriptors)
if err != nil {
- return nil, fmt.Errorf("error executing ps(1) in the container: %w", err)
+ return nil, fmt.Errorf("executing ps(1) in the container: %w", err)
}
// Trick: filter the ps command from the output instead of
diff --git a/libpod/container_top_unsupported.go b/libpod/container_top_unsupported.go
new file mode 100644
index 000000000..a8d9b970b
--- /dev/null
+++ b/libpod/container_top_unsupported.go
@@ -0,0 +1,14 @@
+//go:build !linux
+// +build !linux
+
+package libpod
+
+import (
+ "errors"
+)
+
+// Top gathers statistics about the running processes in a container. It returns a
+// []string for output
+func (c *Container) Top(descriptors []string) ([]string, error) {
+ return nil, errors.New("not implemented (*Container) Top")
+}
diff --git a/libpod/container_unsupported.go b/libpod/container_unsupported.go
new file mode 100644
index 000000000..16bf11622
--- /dev/null
+++ b/libpod/container_unsupported.go
@@ -0,0 +1,7 @@
+//go:build !linux && !freebsd
+// +build !linux,!freebsd
+
+package libpod
+
+type containerPlatformState struct {
+}
diff --git a/libpod/container_validate.go b/libpod/container_validate.go
index e280c60d2..f4611ecce 100644
--- a/libpod/container_validate.go
+++ b/libpod/container_validate.go
@@ -133,5 +133,13 @@ func (c *Container) validate() error {
if len(c.config.InitContainerType) > 0 && len(c.config.Pod) < 1 {
return fmt.Errorf("init containers must be created in a pod: %w", define.ErrInvalidArg)
}
+
+ if c.config.SdNotifyMode == define.SdNotifyModeIgnore && len(c.config.SdNotifySocket) > 0 {
+ return fmt.Errorf("cannot set sd-notify socket %q with sd-notify mode %q", c.config.SdNotifySocket, c.config.SdNotifyMode)
+ }
+
+ if c.config.HealthCheckOnFailureAction != define.HealthCheckOnFailureActionNone && c.config.HealthCheckConfig == nil {
+ return fmt.Errorf("cannot set on-failure action to %s without a health check", c.config.HealthCheckOnFailureAction.String())
+ }
return nil
}
diff --git a/libpod/define/config.go b/libpod/define/config.go
index 0181bd31c..1fad5cc9a 100644
--- a/libpod/define/config.go
+++ b/libpod/define/config.go
@@ -81,15 +81,8 @@ const NoLogging = "none"
// PassthroughLogging is the string conmon expects when specifying to use the passthrough driver
const PassthroughLogging = "passthrough"
-// Strings used for --sdnotify option to podman
-const (
- SdNotifyModeContainer = "container"
- SdNotifyModeConmon = "conmon"
- SdNotifyModeIgnore = "ignore"
-)
-
// DefaultRlimitValue is the value set by default for nofile and nproc
const RLimitDefaultValue = uint64(1048576)
// BindMountPrefix distinguishes its annotations from others
-const BindMountPrefix = "bind-mount-options:"
+const BindMountPrefix = "bind-mount-options"
diff --git a/libpod/define/container_inspect.go b/libpod/define/container_inspect.go
index e6a34ba61..da5c58f27 100644
--- a/libpod/define/container_inspect.go
+++ b/libpod/define/container_inspect.go
@@ -55,6 +55,8 @@ type InspectContainerConfig struct {
StopSignal uint `json:"StopSignal"`
// Configured healthcheck for the container
Healthcheck *manifest.Schema2HealthConfig `json:"Healthcheck,omitempty"`
+ // HealthcheckOnFailureAction defines an action to take once the container turns unhealthy.
+ HealthcheckOnFailureAction string `json:"HealthcheckOnFailureAction,omitempty"`
// CreateCommand is the full command plus arguments of the process the
// container has been created with.
CreateCommand []string `json:"CreateCommand,omitempty"`
@@ -79,6 +81,10 @@ type InspectContainerConfig struct {
// treated as root directories. Standard bind mounts will be mounted
// into paths relative to these directories.
ChrootDirs []string `json:"ChrootDirs,omitempty"`
+ // SdNotifyMode is the sd-notify mode of the container.
+ SdNotifyMode string `json:"sdNotifyMode,omitempty"`
+ // SdNotifySocket is the NOTIFY_SOCKET in use by/configured for the container.
+ SdNotifySocket string `json:"sdNotifySocket,omitempty"`
}
// InspectRestartPolicy holds information about the container's restart policy.
diff --git a/libpod/define/errors.go b/libpod/define/errors.go
index fd27e89de..be471c27e 100644
--- a/libpod/define/errors.go
+++ b/libpod/define/errors.go
@@ -179,6 +179,9 @@ var (
// ErrNetworkInUse indicates the requested operation failed because the network was in use
ErrNetworkInUse = errors.New("network is being used")
+ // ErrNetworkConnected indicates that the required operation failed because the container is already a network endpoint
+ ErrNetworkConnected = errors.New("network is already connected")
+
// ErrStoreNotInitialized indicates that the container storage was never
// initialized.
ErrStoreNotInitialized = errors.New("the container storage was never initialized")
diff --git a/libpod/define/exec_codes.go b/libpod/define/exec_codes.go
index 3f2da4910..a84730e72 100644
--- a/libpod/define/exec_codes.go
+++ b/libpod/define/exec_codes.go
@@ -11,8 +11,8 @@ const (
// ExecErrorCodeGeneric is the default error code to return from an exec session if libpod failed
// prior to calling the runtime
ExecErrorCodeGeneric = 125
- // ExecErrorCodeCannotInvoke is the error code to return when the runtime fails to invoke a command
- // an example of this can be found by trying to execute a directory:
+ // ExecErrorCodeCannotInvoke is the error code to return when the runtime fails to invoke a command.
+ // An example of this can be found by trying to execute a directory:
// `podman exec -l /etc`
ExecErrorCodeCannotInvoke = 126
// ExecErrorCodeNotFound is the error code to return when a command cannot be found
diff --git a/libpod/define/healthchecks.go b/libpod/define/healthchecks.go
index f71274350..274e02561 100644
--- a/libpod/define/healthchecks.go
+++ b/libpod/define/healthchecks.go
@@ -1,5 +1,10 @@
package define
+import (
+ "fmt"
+ "strings"
+)
+
const (
// HealthCheckHealthy describes a healthy container
HealthCheckHealthy string = "healthy"
@@ -57,3 +62,72 @@ const (
// HealthConfigTestCmdShell runs commands with the system's default shell
HealthConfigTestCmdShell = "CMD-SHELL"
)
+
+// HealthCheckOnFailureAction defines how Podman reacts when a container's health
+// status turns unhealthy.
+type HealthCheckOnFailureAction int
+
+// Healthcheck on-failure actions.
+const (
+ // HealthCheckOnFailureActionNonce instructs Podman to not react on an unhealthy status.
+ HealthCheckOnFailureActionNone = iota // Must be first iota for backwards compatibility
+ // HealthCheckOnFailureActionInvalid denotes an invalid on-failure policy.
+ HealthCheckOnFailureActionInvalid = iota
+ // HealthCheckOnFailureActionNonce instructs Podman to kill the container on an unhealthy status.
+ HealthCheckOnFailureActionKill = iota
+ // HealthCheckOnFailureActionNonce instructs Podman to restart the container on an unhealthy status.
+ HealthCheckOnFailureActionRestart = iota
+ // HealthCheckOnFailureActionNonce instructs Podman to stop the container on an unhealthy status.
+ HealthCheckOnFailureActionStop = iota
+)
+
+// String representations for on-failure actions.
+const (
+ strHealthCheckOnFailureActionNone = "none"
+ strHealthCheckOnFailureActionInvalid = "invalid"
+ strHealthCheckOnFailureActionKill = "kill"
+ strHealthCheckOnFailureActionRestart = "restart"
+ strHealthCheckOnFailureActionStop = "stop"
+)
+
+// SupportedHealthCheckOnFailureActions lists all supported healthcheck restart policies.
+var SupportedHealthCheckOnFailureActions = []string{
+ strHealthCheckOnFailureActionNone,
+ strHealthCheckOnFailureActionKill,
+ strHealthCheckOnFailureActionRestart,
+ strHealthCheckOnFailureActionStop,
+}
+
+// String returns the string representation of the HealthCheckOnFailureAction.
+func (h HealthCheckOnFailureAction) String() string {
+ switch h {
+ case HealthCheckOnFailureActionNone:
+ return strHealthCheckOnFailureActionNone
+ case HealthCheckOnFailureActionKill:
+ return strHealthCheckOnFailureActionKill
+ case HealthCheckOnFailureActionRestart:
+ return strHealthCheckOnFailureActionRestart
+ case HealthCheckOnFailureActionStop:
+ return strHealthCheckOnFailureActionStop
+ default:
+ return strHealthCheckOnFailureActionInvalid
+ }
+}
+
+// ParseHealthCheckOnFailureAction parses the specified string into a HealthCheckOnFailureAction.
+// An error is returned for an invalid input.
+func ParseHealthCheckOnFailureAction(s string) (HealthCheckOnFailureAction, error) {
+ switch s {
+ case "", strHealthCheckOnFailureActionNone:
+ return HealthCheckOnFailureActionNone, nil
+ case strHealthCheckOnFailureActionKill:
+ return HealthCheckOnFailureActionKill, nil
+ case strHealthCheckOnFailureActionRestart:
+ return HealthCheckOnFailureActionRestart, nil
+ case strHealthCheckOnFailureActionStop:
+ return HealthCheckOnFailureActionStop, nil
+ default:
+ err := fmt.Errorf("invalid on-failure action %q for health check: supported actions are %s", s, strings.Join(SupportedHealthCheckOnFailureActions, ","))
+ return HealthCheckOnFailureActionInvalid, err
+ }
+}
diff --git a/libpod/define/mount.go b/libpod/define/mount.go
index 1b0d019c8..db444fd83 100644
--- a/libpod/define/mount.go
+++ b/libpod/define/mount.go
@@ -1,8 +1,6 @@
package define
const (
- // TypeBind is the type for mounting host dir
- TypeBind = "bind"
// TypeVolume is the type for named volumes
TypeVolume = "volume"
// TypeTmpfs is the type for mounting tmpfs
diff --git a/libpod/define/mount_freebsd.go b/libpod/define/mount_freebsd.go
new file mode 100644
index 000000000..e080c9ec6
--- /dev/null
+++ b/libpod/define/mount_freebsd.go
@@ -0,0 +1,8 @@
+//go:build freebsd
+
+package define
+
+const (
+ // TypeBind is the type for mounting host dir
+ TypeBind = "nullfs"
+)
diff --git a/libpod/define/mount_linux.go b/libpod/define/mount_linux.go
new file mode 100644
index 000000000..5ef848905
--- /dev/null
+++ b/libpod/define/mount_linux.go
@@ -0,0 +1,8 @@
+//go:build linux
+
+package define
+
+const (
+ // TypeBind is the type for mounting host dir
+ TypeBind = "bind"
+)
diff --git a/libpod/define/mount_unsupported.go b/libpod/define/mount_unsupported.go
new file mode 100644
index 000000000..cb8642fe2
--- /dev/null
+++ b/libpod/define/mount_unsupported.go
@@ -0,0 +1,8 @@
+//go:build !linux && !freebsd
+
+package define
+
+const (
+ // TypeBind is the type for mounting host dir
+ TypeBind = "bind"
+)
diff --git a/libpod/define/sdnotify.go b/libpod/define/sdnotify.go
new file mode 100644
index 000000000..1d548c764
--- /dev/null
+++ b/libpod/define/sdnotify.go
@@ -0,0 +1,20 @@
+package define
+
+import "fmt"
+
+// Strings used for --sdnotify option to podman
+const (
+ SdNotifyModeContainer = "container"
+ SdNotifyModeConmon = "conmon"
+ SdNotifyModeIgnore = "ignore"
+)
+
+// ValidateSdNotifyMode validates the specified mode.
+func ValidateSdNotifyMode(mode string) error {
+ switch mode {
+ case "", SdNotifyModeContainer, SdNotifyModeConmon, SdNotifyModeIgnore:
+ return nil
+ default:
+ return fmt.Errorf("%w: invalid sdnotify value %q: must be %s, %s or %s", ErrInvalidArg, mode, SdNotifyModeContainer, SdNotifyModeConmon, SdNotifyModeIgnore)
+ }
+}
diff --git a/libpod/define/volume_inspect.go b/libpod/define/volume_inspect.go
index 9279812da..76120647c 100644
--- a/libpod/define/volume_inspect.go
+++ b/libpod/define/volume_inspect.go
@@ -57,7 +57,7 @@ type InspectVolumeData struct {
// UID/GID.
NeedsChown bool `json:"NeedsChown,omitempty"`
// Timeout is the specified driver timeout if given
- Timeout int `json:"Timeout,omitempty"`
+ Timeout uint `json:"Timeout,omitempty"`
}
type VolumeReload struct {
diff --git a/libpod/events.go b/libpod/events.go
index c9e4c9d26..2f9799114 100644
--- a/libpod/events.go
+++ b/libpod/events.go
@@ -3,6 +3,7 @@ package libpod
import (
"context"
"fmt"
+ "path/filepath"
"sync"
"github.com/containers/podman/v4/libpod/events"
@@ -11,6 +12,10 @@ import (
// newEventer returns an eventer that can be used to read/write events
func (r *Runtime) newEventer() (events.Eventer, error) {
+ if r.config.Engine.EventsLogFilePath == "" {
+ // default, use path under tmpdir when none was explicitly set by the user
+ r.config.Engine.EventsLogFilePath = filepath.Join(r.config.Engine.TmpDir, "events", "events.log")
+ }
options := events.EventerOptions{
EventerType: r.config.Engine.EventsLogger,
LogFilePath: r.config.Engine.EventsLogFilePath,
@@ -55,6 +60,12 @@ func (c *Container) newContainerExitedEvent(exitCode int32) {
e.Image = c.config.RootfsImageName
e.Type = events.Container
e.ContainerExitCode = int(exitCode)
+
+ e.Details = events.Details{
+ ID: e.ID,
+ Attributes: c.Labels(),
+ }
+
if err := c.runtime.eventer.Write(e); err != nil {
logrus.Errorf("Unable to write container exited event: %q", err)
}
@@ -70,6 +81,12 @@ func (c *Container) newExecDiedEvent(sessionID string, exitCode int) {
e.ContainerExitCode = exitCode
e.Attributes = make(map[string]string)
e.Attributes["execID"] = sessionID
+
+ e.Details = events.Details{
+ ID: e.ID,
+ Attributes: c.Labels(),
+ }
+
if err := c.runtime.eventer.Write(e); err != nil {
logrus.Errorf("Unable to write exec died event: %q", err)
}
@@ -121,11 +138,7 @@ func (v *Volume) newVolumeEvent(status events.Status) {
// Events is a wrapper function for everyone to begin tailing the events log
// with options
func (r *Runtime) Events(ctx context.Context, options events.ReadOptions) error {
- eventer, err := r.newEventer()
- if err != nil {
- return err
- }
- return eventer.Read(ctx, options)
+ return r.eventer.Read(ctx, options)
}
// GetEvents reads the event log and returns events based on input filters
@@ -137,10 +150,6 @@ func (r *Runtime) GetEvents(ctx context.Context, filters []string) ([]*events.Ev
FromStart: true,
Stream: false,
}
- eventer, err := r.newEventer()
- if err != nil {
- return nil, err
- }
logEvents := make([]*events.Event, 0, len(eventChannel))
readLock := sync.Mutex{}
@@ -152,7 +161,7 @@ func (r *Runtime) GetEvents(ctx context.Context, filters []string) ([]*events.Ev
readLock.Unlock()
}()
- readErr := eventer.Read(ctx, options)
+ readErr := r.eventer.Read(ctx, options)
readLock.Lock() // Wait for the events to be consumed.
return logEvents, readErr
}
diff --git a/libpod/events/events_freebsd.go b/libpod/events/events_freebsd.go
new file mode 100644
index 000000000..90933fa2c
--- /dev/null
+++ b/libpod/events/events_freebsd.go
@@ -0,0 +1,23 @@
+package events
+
+import (
+ "fmt"
+ "strings"
+
+ "github.com/sirupsen/logrus"
+)
+
+// NewEventer creates an eventer based on the eventer type
+func NewEventer(options EventerOptions) (Eventer, error) {
+ logrus.Debugf("Initializing event backend %s", options.EventerType)
+ switch strings.ToUpper(options.EventerType) {
+ case strings.ToUpper(LogFile.String()):
+ return EventLogFile{options}, nil
+ case strings.ToUpper(Null.String()):
+ return newNullEventer(), nil
+ case strings.ToUpper(Memory.String()):
+ return NewMemoryEventer(), nil
+ default:
+ return nil, fmt.Errorf("unknown event logger type: %s", strings.ToUpper(options.EventerType))
+ }
+}
diff --git a/libpod/events/events_linux.go b/libpod/events/events_linux.go
index e7801af5b..66b125dd5 100644
--- a/libpod/events/events_linux.go
+++ b/libpod/events/events_linux.go
@@ -18,9 +18,9 @@ func NewEventer(options EventerOptions) (Eventer, error) {
}
return eventer, nil
case strings.ToUpper(LogFile.String()):
- return EventLogFile{options}, nil
+ return newLogFileEventer(options)
case strings.ToUpper(Null.String()):
- return NewNullEventer(), nil
+ return newNullEventer(), nil
case strings.ToUpper(Memory.String()):
return NewMemoryEventer(), nil
default:
diff --git a/libpod/events/events_unsupported.go b/libpod/events/events_unsupported.go
index d766402a9..01031c225 100644
--- a/libpod/events/events_unsupported.go
+++ b/libpod/events/events_unsupported.go
@@ -1,5 +1,5 @@
-//go:build !linux
-// +build !linux
+//go:build !linux && !freebsd
+// +build !linux,!freebsd
package events
diff --git a/libpod/events/journal_linux.go b/libpod/events/journal_linux.go
index 16ef6504f..4986502a2 100644
--- a/libpod/events/journal_linux.go
+++ b/libpod/events/journal_linux.go
@@ -112,57 +112,16 @@ func (e EventJournalD) Read(ctx context.Context, options ReadOptions) error {
}
}
- // the api requires a next|prev before getting a cursor
- if _, err := j.Next(); err != nil {
- return fmt.Errorf("failed to move journal cursor to next entry: %w", err)
- }
-
- prevCursor, err := j.GetCursor()
- if err != nil {
- return fmt.Errorf("failed to get journal cursor: %w", err)
- }
for {
- select {
- case <-ctx.Done():
- // the consumer has cancelled
- return nil
- default:
- // fallthrough
- }
-
- if _, err := j.Next(); err != nil {
- return fmt.Errorf("failed to move journal cursor to next entry: %w", err)
- }
- newCursor, err := j.GetCursor()
+ entry, err := getNextEntry(ctx, j, options.Stream, untilTime)
if err != nil {
- return fmt.Errorf("failed to get journal cursor: %w", err)
+ return err
}
- if prevCursor == newCursor {
- if !options.Stream || (len(options.Until) > 0 && time.Now().After(untilTime)) {
- break
- }
-
- // j.Wait() is blocking, this would cause the goroutine to hang forever
- // if no more journal entries are generated and thus if the client
- // has closed the connection in the meantime to leak memory.
- // Waiting only 5 seconds makes sure we can check if the client closed in the
- // meantime at least every 5 seconds.
- t := 5 * time.Second
- if len(options.Until) > 0 {
- until := time.Until(untilTime)
- if until < t {
- t = until
- }
- }
- _ = j.Wait(t)
- continue
+ // no entry == we hit the end
+ if entry == nil {
+ return nil
}
- prevCursor = newCursor
- entry, err := j.GetEntry()
- if err != nil {
- return fmt.Errorf("failed to read journal entry: %w", err)
- }
newEvent, err := newEventFromJournalEntry(entry)
if err != nil {
// We can't decode this event.
@@ -177,7 +136,6 @@ func (e EventJournalD) Read(ctx context.Context, options ReadOptions) error {
options.EventChannel <- newEvent
}
}
- return nil
}
func newEventFromJournalEntry(entry *sdjournal.JournalEntry) (*Event, error) {
@@ -238,3 +196,51 @@ func newEventFromJournalEntry(entry *sdjournal.JournalEntry) (*Event, error) {
func (e EventJournalD) String() string {
return Journald.String()
}
+
+// getNextEntry returns the next entry in the journal. If the end of the
+// journal is reached and stream is not set or the current time is after
+// the until time this function return nil,nil.
+func getNextEntry(ctx context.Context, j *sdjournal.Journal, stream bool, untilTime time.Time) (*sdjournal.JournalEntry, error) {
+ for {
+ select {
+ case <-ctx.Done():
+ // the consumer has cancelled
+ return nil, nil
+ default:
+ // fallthrough
+ }
+ // the api requires a next|prev before reading the event
+ ret, err := j.Next()
+ if err != nil {
+ return nil, fmt.Errorf("failed to move journal cursor to next entry: %w", err)
+ }
+ // ret == 0 equals EOF, see sd_journal_next(3)
+ if ret == 0 {
+ if !stream || (!untilTime.IsZero() && time.Now().After(untilTime)) {
+ // we hit the end and should not keep streaming
+ return nil, nil
+ }
+ // keep waiting for the next entry
+ // j.Wait() is blocking, this would cause the goroutine to hang forever
+ // if no more journal entries are generated and thus if the client
+ // has closed the connection in the meantime to leak memory.
+ // Waiting only 5 seconds makes sure we can check if the client closed in the
+ // meantime at least every 5 seconds.
+ t := 5 * time.Second
+ if !untilTime.IsZero() {
+ until := time.Until(untilTime)
+ if until < t {
+ t = until
+ }
+ }
+ _ = j.Wait(t)
+ continue
+ }
+
+ entry, err := j.GetEntry()
+ if err != nil {
+ return nil, fmt.Errorf("failed to read journal entry: %w", err)
+ }
+ return entry, nil
+ }
+}
diff --git a/libpod/events/logfile.go b/libpod/events/logfile.go
index c7dbf4850..d749a0d4d 100644
--- a/libpod/events/logfile.go
+++ b/libpod/events/logfile.go
@@ -1,5 +1,5 @@
-//go:build linux
-// +build linux
+//go:build linux || freebsd
+// +build linux freebsd
package events
@@ -12,6 +12,7 @@ import (
"io/ioutil"
"os"
"path"
+ "path/filepath"
"time"
"github.com/containers/podman/v4/pkg/util"
@@ -27,6 +28,21 @@ type EventLogFile struct {
options EventerOptions
}
+// newLogFileEventer creates a new EventLogFile eventer
+func newLogFileEventer(options EventerOptions) (*EventLogFile, error) {
+ // Create events log dir
+ if err := os.MkdirAll(filepath.Dir(options.LogFilePath), 0700); err != nil {
+ return nil, fmt.Errorf("creating events dirs: %w", err)
+ }
+ // We have to make sure the file is created otherwise reading events will hang.
+ // https://github.com/containers/podman/issues/15688
+ fd, err := os.OpenFile(options.LogFilePath, os.O_RDONLY|os.O_CREATE, 0700)
+ if err != nil {
+ return nil, fmt.Errorf("failed to create event log file: %w", err)
+ }
+ return &EventLogFile{options: options}, fd.Close()
+}
+
// Writes to the log file
func (e EventLogFile) Write(ee Event) error {
// We need to lock events file
@@ -108,6 +124,8 @@ func (e EventLogFile) Read(ctx context.Context, options ReadOptions) error {
}
}()
}
+ logrus.Debugf("Reading events from file %q", e.options.LogFilePath)
+
var line *tail.Line
var ok bool
for {
diff --git a/libpod/events/nullout.go b/libpod/events/nullout.go
index 587a1b98b..da3820c23 100644
--- a/libpod/events/nullout.go
+++ b/libpod/events/nullout.go
@@ -2,10 +2,11 @@ package events
import (
"context"
+ "errors"
)
-// EventToNull is an eventer type that only performs write operations
-// and only writes to /dev/null. It is meant for unittests only
+// EventToNull is an eventer type that does nothing.
+// It is meant for unittests only
type EventToNull struct{}
// Write eats the event and always returns nil
@@ -13,14 +14,14 @@ func (e EventToNull) Write(ee Event) error {
return nil
}
-// Read does nothing. Do not use it.
+// Read does nothing and returns an error.
func (e EventToNull) Read(ctx context.Context, options ReadOptions) error {
- return nil
+ return errors.New("cannot read events with the \"none\" backend")
}
-// NewNullEventer returns a new null eventer. You should only do this for
+// newNullEventer returns a new null eventer. You should only do this for
// the purposes of internal libpod testing.
-func NewNullEventer() Eventer {
+func newNullEventer() Eventer {
return EventToNull{}
}
diff --git a/libpod/healthcheck.go b/libpod/healthcheck.go
index 9b9d12b17..e835af9f0 100644
--- a/libpod/healthcheck.go
+++ b/libpod/healthcheck.go
@@ -2,6 +2,7 @@ package libpod
import (
"bufio"
+ "context"
"errors"
"fmt"
"io/ioutil"
@@ -12,6 +13,7 @@ import (
"github.com/containers/podman/v4/libpod/define"
"github.com/sirupsen/logrus"
+ "golang.org/x/sys/unix"
)
const (
@@ -29,9 +31,14 @@ func (r *Runtime) HealthCheck(name string) (define.HealthCheckStatus, error) {
if err != nil {
return define.HealthCheckContainerNotFound, fmt.Errorf("unable to look up %s to perform a health check: %w", name, err)
}
+
hcStatus, err := checkHealthCheckCanBeRun(container)
if err == nil {
- return container.runHealthCheck()
+ hcStatus, err := container.runHealthCheck()
+ if err := container.processHealthCheckStatus(hcStatus); err != nil {
+ return hcStatus, err
+ }
+ return hcStatus, err
}
return hcStatus, err
}
@@ -127,13 +134,45 @@ func (c *Container) runHealthCheck() (define.HealthCheckStatus, error) {
hcResult = define.HealthCheckFailure
hcErr = fmt.Errorf("healthcheck command exceeded timeout of %s", c.HealthCheckConfig().Timeout.String())
}
+
hcl := newHealthCheckLog(timeStart, timeEnd, returnCode, eventLog)
if err := c.updateHealthCheckLog(hcl, inStartPeriod); err != nil {
return hcResult, fmt.Errorf("unable to update health check log %s for %s: %w", c.healthCheckLogPath(), c.ID(), err)
}
+
return hcResult, hcErr
}
+func (c *Container) processHealthCheckStatus(status define.HealthCheckStatus) error {
+ if status == define.HealthCheckSuccess {
+ return nil
+ }
+
+ switch c.config.HealthCheckOnFailureAction {
+ case define.HealthCheckOnFailureActionNone: // Nothing to do
+
+ case define.HealthCheckOnFailureActionKill:
+ if err := c.Kill(uint(unix.SIGKILL)); err != nil {
+ return fmt.Errorf("killing container health-check turned unhealthy: %w", err)
+ }
+
+ case define.HealthCheckOnFailureActionRestart:
+ if err := c.RestartWithTimeout(context.Background(), c.config.StopTimeout); err != nil {
+ return fmt.Errorf("restarting container after health-check turned unhealthy: %w", err)
+ }
+
+ case define.HealthCheckOnFailureActionStop:
+ if err := c.Stop(); err != nil {
+ return fmt.Errorf("stopping container after health-check turned unhealthy: %w", err)
+ }
+
+ default: // Should not happen but better be safe than sorry
+ return fmt.Errorf("unsupported on-failure action %d", c.config.HealthCheckOnFailureAction)
+ }
+
+ return nil
+}
+
func checkHealthCheckCanBeRun(c *Container) (define.HealthCheckStatus, error) {
cstate, err := c.State()
if err != nil {
diff --git a/libpod/healthcheck_linux.go b/libpod/healthcheck_linux.go
index 3fb6dfb91..6948f976a 100644
--- a/libpod/healthcheck_linux.go
+++ b/libpod/healthcheck_linux.go
@@ -70,7 +70,7 @@ func (c *Container) startTimer() error {
startFile := fmt.Sprintf("%s.service", c.ID())
startChan := make(chan string)
- if _, err := conn.StartUnitContext(context.Background(), startFile, "fail", startChan); err != nil {
+ if _, err := conn.RestartUnitContext(context.Background(), startFile, "fail", startChan); err != nil {
return err
}
if err := systemdOpSuccessful(startChan); err != nil {
diff --git a/libpod/healthcheck_unsupported.go b/libpod/healthcheck_unsupported.go
new file mode 100644
index 000000000..92cd5d0a3
--- /dev/null
+++ b/libpod/healthcheck_unsupported.go
@@ -0,0 +1,25 @@
+//go:build !linux
+// +build !linux
+
+package libpod
+
+import (
+ "context"
+ "errors"
+)
+
+// createTimer systemd timers for healthchecks of a container
+func (c *Container) createTimer() error {
+ return errors.New("not implemented (*Container) createTimer")
+}
+
+// startTimer starts a systemd timer for the healthchecks
+func (c *Container) startTimer() error {
+ return errors.New("not implemented (*Container) startTimer")
+}
+
+// removeTransientFiles removes the systemd timer and unit files
+// for the container
+func (c *Container) removeTransientFiles(ctx context.Context) error {
+ return errors.New("not implemented (*Container) removeTransientFiles")
+}
diff --git a/libpod/info.go b/libpod/info.go
index c4193b40d..ad8f65432 100644
--- a/libpod/info.go
+++ b/libpod/info.go
@@ -5,27 +5,21 @@ import (
"bytes"
"errors"
"fmt"
- "io/ioutil"
"math"
"os"
- "os/exec"
"runtime"
- "strconv"
"strings"
"syscall"
"time"
"github.com/containers/buildah"
- "github.com/containers/common/pkg/apparmor"
- "github.com/containers/common/pkg/cgroups"
- "github.com/containers/common/pkg/seccomp"
+ "github.com/containers/buildah/pkg/util"
"github.com/containers/image/v5/pkg/sysregistriesv2"
"github.com/containers/podman/v4/libpod/define"
"github.com/containers/podman/v4/libpod/linkmode"
"github.com/containers/podman/v4/pkg/rootless"
"github.com/containers/storage"
"github.com/containers/storage/pkg/system"
- "github.com/opencontainers/selinux/go-selinux"
"github.com/sirupsen/logrus"
)
@@ -34,20 +28,20 @@ func (r *Runtime) info() (*define.Info, error) {
info := define.Info{}
versionInfo, err := define.GetVersion()
if err != nil {
- return nil, fmt.Errorf("error getting version info: %w", err)
+ return nil, fmt.Errorf("getting version info: %w", err)
}
info.Version = versionInfo
// get host information
hostInfo, err := r.hostInfo()
if err != nil {
- return nil, fmt.Errorf("error getting host info: %w", err)
+ return nil, fmt.Errorf("getting host info: %w", err)
}
info.Host = hostInfo
// get store information
storeInfo, err := r.storeInfo()
if err != nil {
- return nil, fmt.Errorf("error getting store info: %w", err)
+ return nil, fmt.Errorf("getting store info: %w", err)
}
info.Store = storeInfo
registries := make(map[string]interface{})
@@ -55,14 +49,14 @@ func (r *Runtime) info() (*define.Info, error) {
sys := r.SystemContext()
data, err := sysregistriesv2.GetRegistries(sys)
if err != nil {
- return nil, fmt.Errorf("error getting registries: %w", err)
+ return nil, fmt.Errorf("getting registries: %w", err)
}
for _, reg := range data {
registries[reg.Prefix] = reg
}
regs, err := sysregistriesv2.UnqualifiedSearchRegistries(sys)
if err != nil {
- return nil, fmt.Errorf("error getting registries: %w", err)
+ return nil, fmt.Errorf("getting registries: %w", err)
}
if len(regs) > 0 {
registries["search"] = regs
@@ -86,109 +80,45 @@ func (r *Runtime) hostInfo() (*define.HostInfo, error) {
// lets say OS, arch, number of cpus, amount of memory, maybe os distribution/version, hostname, kernel version, uptime
mi, err := system.ReadMemInfo()
if err != nil {
- return nil, fmt.Errorf("error reading memory info: %w", err)
+ return nil, fmt.Errorf("reading memory info: %w", err)
}
hostDistributionInfo := r.GetHostDistributionInfo()
- kv, err := readKernelVersion()
+ kv, err := util.ReadKernelVersion()
if err != nil {
- return nil, fmt.Errorf("error reading kernel version: %w", err)
+ return nil, fmt.Errorf("reading kernel version: %w", err)
}
host, err := os.Hostname()
if err != nil {
- return nil, fmt.Errorf("error getting hostname: %w", err)
+ return nil, fmt.Errorf("getting hostname: %w", err)
}
- seccompProfilePath, err := DefaultSeccompPath()
- if err != nil {
- return nil, fmt.Errorf("error getting Seccomp profile path: %w", err)
- }
-
- // Cgroups version
- unified, err := cgroups.IsCgroup2UnifiedMode()
- if err != nil {
- return nil, fmt.Errorf("error reading cgroups mode: %w", err)
- }
-
- // Get Map of all available controllers
- availableControllers, err := cgroups.GetAvailableControllers(nil, unified)
- if err != nil {
- return nil, fmt.Errorf("error getting available cgroup controllers: %w", err)
- }
cpuUtil, err := getCPUUtilization()
if err != nil {
return nil, err
}
info := define.HostInfo{
- Arch: runtime.GOARCH,
- BuildahVersion: buildah.Version,
- CgroupManager: r.config.Engine.CgroupManager,
- CgroupControllers: availableControllers,
- Linkmode: linkmode.Linkmode(),
- CPUs: runtime.NumCPU(),
- CPUUtilization: cpuUtil,
- Distribution: hostDistributionInfo,
- LogDriver: r.config.Containers.LogDriver,
- EventLogger: r.eventer.String(),
- Hostname: host,
- IDMappings: define.IDMappings{},
- Kernel: kv,
- MemFree: mi.MemFree,
- MemTotal: mi.MemTotal,
- NetworkBackend: r.config.Network.NetworkBackend,
- OS: runtime.GOOS,
- Security: define.SecurityInfo{
- AppArmorEnabled: apparmor.IsEnabled(),
- DefaultCapabilities: strings.Join(r.config.Containers.DefaultCapabilities, ","),
- Rootless: rootless.IsRootless(),
- SECCOMPEnabled: seccomp.IsEnabled(),
- SECCOMPProfilePath: seccompProfilePath,
- SELinuxEnabled: selinux.GetEnabled(),
- },
- Slirp4NetNS: define.SlirpInfo{},
- SwapFree: mi.SwapFree,
- SwapTotal: mi.SwapTotal,
- }
-
- cgroupVersion := "v1"
- if unified {
- cgroupVersion = "v2"
- }
- info.CgroupsVersion = cgroupVersion
-
- slirp4netnsPath := r.config.Engine.NetworkCmdPath
- if slirp4netnsPath == "" {
- slirp4netnsPath, _ = exec.LookPath("slirp4netns")
- }
- if slirp4netnsPath != "" {
- version, err := programVersion(slirp4netnsPath)
- if err != nil {
- logrus.Warnf("Failed to retrieve program version for %s: %v", slirp4netnsPath, err)
- }
- program := define.SlirpInfo{
- Executable: slirp4netnsPath,
- Package: packageVersion(slirp4netnsPath),
- Version: version,
- }
- info.Slirp4NetNS = program
- }
-
- if rootless.IsRootless() {
- uidmappings, err := rootless.ReadMappingsProc("/proc/self/uid_map")
- if err != nil {
- return nil, fmt.Errorf("error reading uid mappings: %w", err)
- }
- gidmappings, err := rootless.ReadMappingsProc("/proc/self/gid_map")
- if err != nil {
- return nil, fmt.Errorf("error reading gid mappings: %w", err)
- }
- idmappings := define.IDMappings{
- GIDMap: gidmappings,
- UIDMap: uidmappings,
- }
- info.IDMappings = idmappings
+ Arch: runtime.GOARCH,
+ BuildahVersion: buildah.Version,
+ Linkmode: linkmode.Linkmode(),
+ CPUs: runtime.NumCPU(),
+ CPUUtilization: cpuUtil,
+ Distribution: hostDistributionInfo,
+ LogDriver: r.config.Containers.LogDriver,
+ EventLogger: r.eventer.String(),
+ Hostname: host,
+ Kernel: kv,
+ MemFree: mi.MemFree,
+ MemTotal: mi.MemTotal,
+ NetworkBackend: r.config.Network.NetworkBackend,
+ OS: runtime.GOOS,
+ SwapFree: mi.SwapFree,
+ SwapTotal: mi.SwapTotal,
+ }
+ if err := r.setPlatformHostInfo(&info); err != nil {
+ return nil, err
}
conmonInfo, ociruntimeInfo, err := r.defaultOCIRuntime.RuntimeInfo()
@@ -199,9 +129,9 @@ func (r *Runtime) hostInfo() (*define.HostInfo, error) {
info.OCIRuntime = ociruntimeInfo
}
- duration, err := procUptime()
+ duration, err := util.ReadUptime()
if err != nil {
- return nil, fmt.Errorf("error reading up time: %w", err)
+ return nil, fmt.Errorf("reading up time: %w", err)
}
uptime := struct {
@@ -271,7 +201,7 @@ func (r *Runtime) storeInfo() (*define.StoreInfo, error) {
}
images, err := r.store.Images()
if err != nil {
- return nil, fmt.Errorf("error getting number of images: %w", err)
+ return nil, fmt.Errorf("getting number of images: %w", err)
}
conInfo, err := r.getContainerStoreInfo()
if err != nil {
@@ -329,31 +259,6 @@ func (r *Runtime) storeInfo() (*define.StoreInfo, error) {
return &info, nil
}
-func readKernelVersion() (string, error) {
- buf, err := ioutil.ReadFile("/proc/version")
- if err != nil {
- return "", err
- }
- f := bytes.Fields(buf)
- if len(f) < 3 {
- return string(bytes.TrimSpace(buf)), nil
- }
- return string(f[2]), nil
-}
-
-func procUptime() (time.Duration, error) {
- var zero time.Duration
- buf, err := ioutil.ReadFile("/proc/uptime")
- if err != nil {
- return zero, err
- }
- f := bytes.Fields(buf)
- if len(f) < 1 {
- return zero, errors.New("unable to parse uptime from /proc/uptime")
- }
- return time.ParseDuration(string(f[0]) + "s")
-}
-
// GetHostDistributionInfo returns a map containing the host's distribution and version
func (r *Runtime) GetHostDistributionInfo() define.DistributionInfo {
// Populate values in case we cannot find the values
@@ -385,43 +290,3 @@ func (r *Runtime) GetHostDistributionInfo() define.DistributionInfo {
}
return dist
}
-
-// getCPUUtilization Returns a CPUUsage object that summarizes CPU
-// usage for userspace, system, and idle time.
-func getCPUUtilization() (*define.CPUUsage, error) {
- f, err := os.Open("/proc/stat")
- if err != nil {
- return nil, err
- }
- defer f.Close()
- scanner := bufio.NewScanner(f)
- // Read first line of /proc/stat that has entries for system ("cpu" line)
- for scanner.Scan() {
- break
- }
- // column 1 is user, column 3 is system, column 4 is idle
- stats := strings.Fields(scanner.Text())
- return statToPercent(stats)
-}
-
-func statToPercent(stats []string) (*define.CPUUsage, error) {
- userTotal, err := strconv.ParseFloat(stats[1], 64)
- if err != nil {
- return nil, fmt.Errorf("unable to parse user value %q: %w", stats[1], err)
- }
- systemTotal, err := strconv.ParseFloat(stats[3], 64)
- if err != nil {
- return nil, fmt.Errorf("unable to parse system value %q: %w", stats[3], err)
- }
- idleTotal, err := strconv.ParseFloat(stats[4], 64)
- if err != nil {
- return nil, fmt.Errorf("unable to parse idle value %q: %w", stats[4], err)
- }
- total := userTotal + systemTotal + idleTotal
- s := define.CPUUsage{
- UserPercent: math.Round((userTotal/total*100)*100) / 100,
- SystemPercent: math.Round((systemTotal/total*100)*100) / 100,
- IdlePercent: math.Round((idleTotal/total*100)*100) / 100,
- }
- return &s, nil
-}
diff --git a/libpod/info_freebsd.go b/libpod/info_freebsd.go
new file mode 100644
index 000000000..1be988350
--- /dev/null
+++ b/libpod/info_freebsd.go
@@ -0,0 +1,40 @@
+package libpod
+
+import (
+ "fmt"
+ "unsafe"
+
+ "github.com/containers/podman/v4/libpod/define"
+ "golang.org/x/sys/unix"
+)
+
+func (r *Runtime) setPlatformHostInfo(info *define.HostInfo) error {
+ return nil
+}
+
+func timeToPercent(time uint64, total uint64) float64 {
+ return 100.0 * float64(time) / float64(total)
+}
+
+// getCPUUtilization Returns a CPUUsage object that summarizes CPU
+// usage for userspace, system, and idle time.
+func getCPUUtilization() (*define.CPUUsage, error) {
+ buf, err := unix.SysctlRaw("kern.cp_time")
+ if err != nil {
+ return nil, fmt.Errorf("reading sysctl kern.cp_time: %w", err)
+ }
+
+ var total uint64 = 0
+ var times [unix.CPUSTATES]uint64
+
+ for i := 0; i < unix.CPUSTATES; i++ {
+ val := *(*uint64)(unsafe.Pointer(&buf[8*i]))
+ times[i] = val
+ total += val
+ }
+ return &define.CPUUsage{
+ UserPercent: timeToPercent(times[unix.CP_USER], total),
+ SystemPercent: timeToPercent(times[unix.CP_SYS], total),
+ IdlePercent: timeToPercent(times[unix.CP_IDLE], total),
+ }, nil
+}
diff --git a/libpod/info_linux.go b/libpod/info_linux.go
new file mode 100644
index 000000000..44beafa8c
--- /dev/null
+++ b/libpod/info_linux.go
@@ -0,0 +1,132 @@
+package libpod
+
+import (
+ "bufio"
+ "fmt"
+ "math"
+ "os"
+ "os/exec"
+ "strconv"
+ "strings"
+
+ "github.com/containers/common/pkg/apparmor"
+ "github.com/containers/common/pkg/cgroups"
+ "github.com/containers/common/pkg/seccomp"
+ "github.com/containers/podman/v4/libpod/define"
+ "github.com/containers/podman/v4/pkg/rootless"
+ "github.com/opencontainers/selinux/go-selinux"
+ "github.com/sirupsen/logrus"
+)
+
+func (r *Runtime) setPlatformHostInfo(info *define.HostInfo) error {
+ seccompProfilePath, err := DefaultSeccompPath()
+ if err != nil {
+ return fmt.Errorf("getting Seccomp profile path: %w", err)
+ }
+
+ // Cgroups version
+ unified, err := cgroups.IsCgroup2UnifiedMode()
+ if err != nil {
+ return fmt.Errorf("reading cgroups mode: %w", err)
+ }
+
+ // Get Map of all available controllers
+ availableControllers, err := cgroups.GetAvailableControllers(nil, unified)
+ if err != nil {
+ return fmt.Errorf("getting available cgroup controllers: %w", err)
+ }
+
+ info.CgroupManager = r.config.Engine.CgroupManager
+ info.CgroupControllers = availableControllers
+ info.IDMappings = define.IDMappings{}
+ info.Security = define.SecurityInfo{
+ AppArmorEnabled: apparmor.IsEnabled(),
+ DefaultCapabilities: strings.Join(r.config.Containers.DefaultCapabilities, ","),
+ Rootless: rootless.IsRootless(),
+ SECCOMPEnabled: seccomp.IsEnabled(),
+ SECCOMPProfilePath: seccompProfilePath,
+ SELinuxEnabled: selinux.GetEnabled(),
+ }
+ info.Slirp4NetNS = define.SlirpInfo{}
+
+ cgroupVersion := "v1"
+ if unified {
+ cgroupVersion = "v2"
+ }
+ info.CgroupsVersion = cgroupVersion
+
+ slirp4netnsPath := r.config.Engine.NetworkCmdPath
+ if slirp4netnsPath == "" {
+ slirp4netnsPath, _ = exec.LookPath("slirp4netns")
+ }
+ if slirp4netnsPath != "" {
+ version, err := programVersion(slirp4netnsPath)
+ if err != nil {
+ logrus.Warnf("Failed to retrieve program version for %s: %v", slirp4netnsPath, err)
+ }
+ program := define.SlirpInfo{
+ Executable: slirp4netnsPath,
+ Package: packageVersion(slirp4netnsPath),
+ Version: version,
+ }
+ info.Slirp4NetNS = program
+ }
+
+ if rootless.IsRootless() {
+ uidmappings, err := rootless.ReadMappingsProc("/proc/self/uid_map")
+ if err != nil {
+ return fmt.Errorf("reading uid mappings: %w", err)
+ }
+ gidmappings, err := rootless.ReadMappingsProc("/proc/self/gid_map")
+ if err != nil {
+ return fmt.Errorf("reading gid mappings: %w", err)
+ }
+ idmappings := define.IDMappings{
+ GIDMap: gidmappings,
+ UIDMap: uidmappings,
+ }
+ info.IDMappings = idmappings
+ }
+
+ return nil
+}
+
+func statToPercent(stats []string) (*define.CPUUsage, error) {
+ userTotal, err := strconv.ParseFloat(stats[1], 64)
+ if err != nil {
+ return nil, fmt.Errorf("unable to parse user value %q: %w", stats[1], err)
+ }
+ systemTotal, err := strconv.ParseFloat(stats[3], 64)
+ if err != nil {
+ return nil, fmt.Errorf("unable to parse system value %q: %w", stats[3], err)
+ }
+ idleTotal, err := strconv.ParseFloat(stats[4], 64)
+ if err != nil {
+ return nil, fmt.Errorf("unable to parse idle value %q: %w", stats[4], err)
+ }
+ total := userTotal + systemTotal + idleTotal
+ s := define.CPUUsage{
+ UserPercent: math.Round((userTotal/total*100)*100) / 100,
+ SystemPercent: math.Round((systemTotal/total*100)*100) / 100,
+ IdlePercent: math.Round((idleTotal/total*100)*100) / 100,
+ }
+ return &s, nil
+}
+
+// getCPUUtilization Returns a CPUUsage object that summarizes CPU
+// usage for userspace, system, and idle time.
+func getCPUUtilization() (*define.CPUUsage, error) {
+ f, err := os.Open("/proc/stat")
+ if err != nil {
+ return nil, err
+ }
+ defer f.Close()
+ scanner := bufio.NewScanner(f)
+ // Read first line of /proc/stat that has entries for system ("cpu" line)
+ for scanner.Scan() {
+ break
+ }
+ // column 1 is user, column 3 is system, column 4 is idle
+ stats := strings.Fields(scanner.Text())
+ return statToPercent(stats)
+}
diff --git a/libpod/info_unsupported.go b/libpod/info_unsupported.go
new file mode 100644
index 000000000..0aed51247
--- /dev/null
+++ b/libpod/info_unsupported.go
@@ -0,0 +1,14 @@
+//go:build !linux && !freebsd
+// +build !linux,!freebsd
+
+package libpod
+
+import (
+ "errors"
+
+ "github.com/containers/podman/v4/libpod/define"
+)
+
+func (r *Runtime) info() (*define.Info, error) {
+ return nil, errors.New("not implemented (*Runtime) info")
+}
diff --git a/libpod/kube.go b/libpod/kube.go
index 8c09a6bb5..1f4831006 100644
--- a/libpod/kube.go
+++ b/libpod/kube.go
@@ -62,6 +62,7 @@ func (p *Pod) GenerateForKube(ctx context.Context) (*v1.Pod, []v1.ServicePort, e
extraHost := make([]v1.HostAlias, 0)
hostNetwork := false
+ hostUsers := true
if p.HasInfraContainer() {
infraContainer, err := p.getInfraContainer()
if err != nil {
@@ -87,8 +88,9 @@ func (p *Pod) GenerateForKube(ctx context.Context) (*v1.Pod, []v1.ServicePort, e
return nil, servicePorts, err
}
hostNetwork = infraContainer.NetworkMode() == string(namespaces.NetworkMode(specgen.Host))
+ hostUsers = infraContainer.IDMappings().HostUIDMapping && infraContainer.IDMappings().HostGIDMapping
}
- pod, err := p.podWithContainers(ctx, allContainers, ports, hostNetwork)
+ pod, err := p.podWithContainers(ctx, allContainers, ports, hostNetwork, hostUsers)
if err != nil {
return nil, servicePorts, err
}
@@ -267,6 +269,8 @@ func GenerateKubeServiceFromV1Pod(pod *v1.Pod, servicePorts []v1.ServicePort) (Y
}
service.Spec = serviceSpec
service.ObjectMeta = pod.ObjectMeta
+ // Reset the annotations for the service as the pod annotations are not needed for the service
+ service.ObjectMeta.Annotations = nil
tm := v12.TypeMeta{
Kind: "Service",
APIVersion: pod.TypeMeta.APIVersion,
@@ -346,7 +350,7 @@ func containersToServicePorts(containers []v1.Container) ([]v1.ServicePort, erro
return sps, nil
}
-func (p *Pod) podWithContainers(ctx context.Context, containers []*Container, ports []v1.ContainerPort, hostNetwork bool) (*v1.Pod, error) {
+func (p *Pod) podWithContainers(ctx context.Context, containers []*Container, ports []v1.ContainerPort, hostNetwork, hostUsers bool) (*v1.Pod, error) {
deDupPodVolumes := make(map[string]*v1.Volume)
first := true
podContainers := make([]v1.Container, 0, len(containers))
@@ -383,7 +387,7 @@ func (p *Pod) podWithContainers(ctx context.Context, containers []*Container, po
return nil, err
}
for k, v := range annotations {
- podAnnotations[define.BindMountPrefix+k] = TruncateKubeAnnotation(v)
+ podAnnotations[define.BindMountPrefix] = TruncateKubeAnnotation(k + ":" + v)
}
// Since port bindings for the pod are handled by the
// infra container, wipe them here only if we are sharing the net namespace
@@ -444,10 +448,11 @@ func (p *Pod) podWithContainers(ctx context.Context, containers []*Container, po
podVolumes,
&dnsInfo,
hostNetwork,
+ hostUsers,
hostname), nil
}
-func newPodObject(podName string, annotations map[string]string, initCtrs, containers []v1.Container, volumes []v1.Volume, dnsOptions *v1.PodDNSConfig, hostNetwork bool, hostname string) *v1.Pod {
+func newPodObject(podName string, annotations map[string]string, initCtrs, containers []v1.Container, volumes []v1.Volume, dnsOptions *v1.PodDNSConfig, hostNetwork, hostUsers bool, hostname string) *v1.Pod {
tm := v12.TypeMeta{
Kind: "Pod",
APIVersion: "v1",
@@ -466,12 +471,21 @@ func newPodObject(podName string, annotations map[string]string, initCtrs, conta
CreationTimestamp: v12.Now(),
Annotations: annotations,
}
+ // Set enableServiceLinks to false as podman doesn't use the service port environment variables
+ enableServiceLinks := false
+ // Set automountServiceAccountToken to false as podman doesn't use service account tokens
+ automountServiceAccountToken := false
ps := v1.PodSpec{
- Containers: containers,
- Hostname: hostname,
- HostNetwork: hostNetwork,
- InitContainers: initCtrs,
- Volumes: volumes,
+ Containers: containers,
+ Hostname: hostname,
+ HostNetwork: hostNetwork,
+ InitContainers: initCtrs,
+ Volumes: volumes,
+ EnableServiceLinks: &enableServiceLinks,
+ AutomountServiceAccountToken: &automountServiceAccountToken,
+ }
+ if !hostUsers {
+ ps.HostUsers = &hostUsers
}
if dnsOptions != nil && (len(dnsOptions.Nameservers)+len(dnsOptions.Searches)+len(dnsOptions.Options) > 0) {
ps.DNSConfig = dnsOptions
@@ -490,6 +504,7 @@ func simplePodWithV1Containers(ctx context.Context, ctrs []*Container) (*v1.Pod,
kubeCtrs := make([]v1.Container, 0, len(ctrs))
kubeInitCtrs := []v1.Container{}
kubeVolumes := make([]v1.Volume, 0)
+ hostUsers := true
hostNetwork := true
podDNS := v1.PodDNSConfig{}
kubeAnnotations := make(map[string]string)
@@ -519,12 +534,15 @@ func simplePodWithV1Containers(ctx context.Context, ctrs []*Container) (*v1.Pod,
if !ctr.HostNetwork() {
hostNetwork = false
}
+ if !(ctr.IDMappings().HostUIDMapping && ctr.IDMappings().HostGIDMapping) {
+ hostUsers = false
+ }
kubeCtr, kubeVols, ctrDNS, annotations, err := containerToV1Container(ctx, ctr)
if err != nil {
return nil, err
}
for k, v := range annotations {
- kubeAnnotations[define.BindMountPrefix+k] = TruncateKubeAnnotation(v)
+ kubeAnnotations[define.BindMountPrefix] = TruncateKubeAnnotation(k + ":" + v)
}
if isInit {
kubeInitCtrs = append(kubeInitCtrs, kubeCtr)
@@ -580,6 +598,7 @@ func simplePodWithV1Containers(ctx context.Context, ctrs []*Container) (*v1.Pod,
kubeVolumes,
&podDNS,
hostNetwork,
+ hostUsers,
hostname), nil
}
diff --git a/libpod/lock/file/file_lock.go b/libpod/lock/file/file_lock.go
index 1379e690a..bcbaea5e6 100644
--- a/libpod/lock/file/file_lock.go
+++ b/libpod/lock/file/file_lock.go
@@ -2,7 +2,6 @@ package file
import (
"fmt"
- "io/ioutil"
"os"
"path/filepath"
"strconv"
@@ -103,7 +102,7 @@ func (locks *FileLocks) AllocateGivenLock(lck uint32) error {
f, err := os.OpenFile(locks.getLockPath(lck), os.O_RDWR|os.O_CREATE|os.O_EXCL, 0666)
if err != nil {
- return fmt.Errorf("error creating lock %d: %w", lck, err)
+ return fmt.Errorf("creating lock %d: %w", lck, err)
}
f.Close()
@@ -129,9 +128,9 @@ func (locks *FileLocks) DeallocateAllLocks() error {
if !locks.valid {
return fmt.Errorf("locks have already been closed: %w", syscall.EINVAL)
}
- files, err := ioutil.ReadDir(locks.lockPath)
+ files, err := os.ReadDir(locks.lockPath)
if err != nil {
- return fmt.Errorf("error reading directory %s: %w", locks.lockPath, err)
+ return fmt.Errorf("reading directory %s: %w", locks.lockPath, err)
}
var lastErr error
for _, f := range files {
@@ -153,7 +152,7 @@ func (locks *FileLocks) LockFileLock(lck uint32) error {
l, err := storage.GetLockfile(locks.getLockPath(lck))
if err != nil {
- return fmt.Errorf("error acquiring lock: %w", err)
+ return fmt.Errorf("acquiring lock: %w", err)
}
l.Lock()
@@ -167,7 +166,7 @@ func (locks *FileLocks) UnlockFileLock(lck uint32) error {
}
l, err := storage.GetLockfile(locks.getLockPath(lck))
if err != nil {
- return fmt.Errorf("error acquiring lock: %w", err)
+ return fmt.Errorf("acquiring lock: %w", err)
}
l.Unlock()
diff --git a/libpod/networking_common.go b/libpod/networking_common.go
new file mode 100644
index 000000000..fa444e26a
--- /dev/null
+++ b/libpod/networking_common.go
@@ -0,0 +1,719 @@
+//go:build linux || freebsd
+// +build linux freebsd
+
+package libpod
+
+import (
+ "errors"
+ "fmt"
+ "regexp"
+ "sort"
+
+ "github.com/containers/common/libnetwork/etchosts"
+ "github.com/containers/common/libnetwork/types"
+ "github.com/containers/common/pkg/config"
+ "github.com/containers/common/pkg/machine"
+ "github.com/containers/common/pkg/util"
+ "github.com/containers/podman/v4/libpod/define"
+ "github.com/containers/podman/v4/libpod/events"
+ "github.com/containers/podman/v4/pkg/namespaces"
+ "github.com/containers/podman/v4/pkg/rootless"
+ "github.com/containers/storage/pkg/lockfile"
+ "github.com/sirupsen/logrus"
+)
+
+// convertPortMappings will remove the HostIP part from the ports when running inside podman machine.
+// This is need because a HostIP of 127.0.0.1 would now allow the gvproxy forwarder to reach to open ports.
+// For machine the HostIP must only be used by gvproxy and never in the VM.
+func (c *Container) convertPortMappings() []types.PortMapping {
+ if !machine.IsGvProxyBased() || len(c.config.PortMappings) == 0 {
+ return c.config.PortMappings
+ }
+ // if we run in a machine VM we have to ignore the host IP part
+ newPorts := make([]types.PortMapping, 0, len(c.config.PortMappings))
+ for _, port := range c.config.PortMappings {
+ port.HostIP = ""
+ newPorts = append(newPorts, port)
+ }
+ return newPorts
+}
+
+func (c *Container) getNetworkOptions(networkOpts map[string]types.PerNetworkOptions) types.NetworkOptions {
+ opts := types.NetworkOptions{
+ ContainerID: c.config.ID,
+ ContainerName: getCNIPodName(c),
+ }
+ opts.PortMappings = c.convertPortMappings()
+
+ // If the container requested special network options use this instead of the config.
+ // This is the case for container restore or network reload.
+ if c.perNetworkOpts != nil {
+ opts.Networks = c.perNetworkOpts
+ } else {
+ opts.Networks = networkOpts
+ }
+ return opts
+}
+
+// setUpNetwork will set up the the networks, on error it will also tear down the cni
+// networks. If rootless it will join/create the rootless network namespace.
+func (r *Runtime) setUpNetwork(ns string, opts types.NetworkOptions) (map[string]types.StatusBlock, error) {
+ rootlessNetNS, err := r.GetRootlessNetNs(true)
+ if err != nil {
+ return nil, err
+ }
+ var results map[string]types.StatusBlock
+ setUpPod := func() error {
+ results, err = r.network.Setup(ns, types.SetupOptions{NetworkOptions: opts})
+ return err
+ }
+ // rootlessNetNS is nil if we are root
+ if rootlessNetNS != nil {
+ // execute the setup in the rootless net ns
+ err = rootlessNetNS.Do(setUpPod)
+ rootlessNetNS.Lock.Unlock()
+ } else {
+ err = setUpPod()
+ }
+ return results, err
+}
+
+// getCNIPodName return the pod name (hostname) used by CNI and the dnsname plugin.
+// If we are in the pod network namespace use the pod name otherwise the container name
+func getCNIPodName(c *Container) string {
+ if c.config.NetMode.IsPod() || c.IsInfra() {
+ pod, err := c.runtime.state.Pod(c.PodID())
+ if err == nil {
+ return pod.Name()
+ }
+ }
+ return c.Name()
+}
+
+// Tear down a container's network configuration and joins the
+// rootless net ns as rootless user
+func (r *Runtime) teardownNetwork(ns string, opts types.NetworkOptions) error {
+ rootlessNetNS, err := r.GetRootlessNetNs(false)
+ if err != nil {
+ return err
+ }
+ tearDownPod := func() error {
+ if err := r.network.Teardown(ns, types.TeardownOptions{NetworkOptions: opts}); err != nil {
+ return fmt.Errorf("tearing down network namespace configuration for container %s: %w", opts.ContainerID, err)
+ }
+ return nil
+ }
+
+ // rootlessNetNS is nil if we are root
+ if rootlessNetNS != nil {
+ // execute the cni setup in the rootless net ns
+ err = rootlessNetNS.Do(tearDownPod)
+ if cerr := rootlessNetNS.Cleanup(r); cerr != nil {
+ logrus.WithError(err).Error("failed to clean up rootless netns")
+ }
+ rootlessNetNS.Lock.Unlock()
+ } else {
+ err = tearDownPod()
+ }
+ return err
+}
+
+// Tear down a container's CNI network configuration, but do not tear down the
+// namespace itself.
+func (r *Runtime) teardownCNI(ctr *Container) error {
+ if ctr.state.NetNS == nil {
+ // The container has no network namespace, we're set
+ return nil
+ }
+
+ logrus.Debugf("Tearing down network namespace at %s for container %s", ctr.state.NetNS.Path(), ctr.ID())
+
+ networks, err := ctr.networks()
+ if err != nil {
+ return err
+ }
+
+ if !ctr.config.NetMode.IsSlirp4netns() && len(networks) > 0 {
+ netOpts := ctr.getNetworkOptions(networks)
+ return r.teardownNetwork(ctr.state.NetNS.Path(), netOpts)
+ }
+ return nil
+}
+
+// isBridgeNetMode checks if the given network mode is bridge.
+// It returns nil when it is set to bridge and an error otherwise.
+func isBridgeNetMode(n namespaces.NetworkMode) error {
+ if !n.IsBridge() {
+ return fmt.Errorf("%q is not supported: %w", n, define.ErrNetworkModeInvalid)
+ }
+ return nil
+}
+
+// Reload only works with containers with a configured network.
+// It will tear down, and then reconfigure, the network of the container.
+// This is mainly used when a reload of firewall rules wipes out existing
+// firewall configuration.
+// Efforts will be made to preserve MAC and IP addresses, but this only works if
+// the container only joined a single CNI network, and was only assigned a
+// single MAC or IP.
+// Only works on root containers at present, though in the future we could
+// extend this to stop + restart slirp4netns
+func (r *Runtime) reloadContainerNetwork(ctr *Container) (map[string]types.StatusBlock, error) {
+ if ctr.state.NetNS == nil {
+ return nil, fmt.Errorf("container %s network is not configured, refusing to reload: %w", ctr.ID(), define.ErrCtrStateInvalid)
+ }
+ if err := isBridgeNetMode(ctr.config.NetMode); err != nil {
+ return nil, err
+ }
+ logrus.Infof("Going to reload container %s network", ctr.ID())
+
+ err := r.teardownCNI(ctr)
+ if err != nil {
+ // teardownCNI will error if the iptables rules do not exists and this is the case after
+ // a firewall reload. The purpose of network reload is to recreate the rules if they do
+ // not exists so we should not log this specific error as error. This would confuse users otherwise.
+ // iptables-legacy and iptables-nft will create different errors make sure to match both.
+ b, rerr := regexp.MatchString("Couldn't load target `CNI-[a-f0-9]{24}':No such file or directory|Chain 'CNI-[a-f0-9]{24}' does not exist", err.Error())
+ if rerr == nil && !b {
+ logrus.Error(err)
+ } else {
+ logrus.Info(err)
+ }
+ }
+
+ networkOpts, err := ctr.networks()
+ if err != nil {
+ return nil, err
+ }
+
+ // Set the same network settings as before..
+ netStatus := ctr.getNetworkStatus()
+ for network, perNetOpts := range networkOpts {
+ for name, netInt := range netStatus[network].Interfaces {
+ perNetOpts.InterfaceName = name
+ perNetOpts.StaticMAC = netInt.MacAddress
+ for _, netAddress := range netInt.Subnets {
+ perNetOpts.StaticIPs = append(perNetOpts.StaticIPs, netAddress.IPNet.IP)
+ }
+ // Normally interfaces have a length of 1, only for some special cni configs we could get more.
+ // For now just use the first interface to get the ips this should be good enough for most cases.
+ break
+ }
+ networkOpts[network] = perNetOpts
+ }
+ ctr.perNetworkOpts = networkOpts
+
+ return r.configureNetNS(ctr, ctr.state.NetNS)
+}
+
+// Produce an InspectNetworkSettings containing information on the container
+// network.
+func (c *Container) getContainerNetworkInfo() (*define.InspectNetworkSettings, error) {
+ if c.config.NetNsCtr != "" {
+ netNsCtr, err := c.runtime.GetContainer(c.config.NetNsCtr)
+ if err != nil {
+ return nil, err
+ }
+ // see https://github.com/containers/podman/issues/10090
+ // the container has to be locked for syncContainer()
+ netNsCtr.lock.Lock()
+ defer netNsCtr.lock.Unlock()
+ // Have to sync to ensure that state is populated
+ if err := netNsCtr.syncContainer(); err != nil {
+ return nil, err
+ }
+ logrus.Debugf("Container %s shares network namespace, retrieving network info of container %s", c.ID(), c.config.NetNsCtr)
+
+ return netNsCtr.getContainerNetworkInfo()
+ }
+
+ settings := new(define.InspectNetworkSettings)
+ settings.Ports = makeInspectPortBindings(c.config.PortMappings, c.config.ExposedPorts)
+
+ networks, err := c.networks()
+ if err != nil {
+ return nil, err
+ }
+
+ if c.state.NetNS == nil {
+ if networkNSPath := c.joinedNetworkNSPath(); networkNSPath != "" {
+ if result, err := c.inspectJoinedNetworkNS(networkNSPath); err == nil {
+ // fallback to dummy configuration
+ settings.InspectBasicNetworkConfig = resultToBasicNetworkConfig(result)
+ return settings, nil
+ }
+ // do not propagate error inspecting a joined network ns
+ logrus.Errorf("Inspecting network namespace: %s of container %s: %v", networkNSPath, c.ID(), err)
+ }
+ // We can't do more if the network is down.
+
+ // We still want to make dummy configurations for each CNI net
+ // the container joined.
+ if len(networks) > 0 {
+ settings.Networks = make(map[string]*define.InspectAdditionalNetwork, len(networks))
+ for net, opts := range networks {
+ cniNet := new(define.InspectAdditionalNetwork)
+ cniNet.NetworkID = net
+ cniNet.Aliases = opts.Aliases
+ settings.Networks[net] = cniNet
+ }
+ }
+
+ return settings, nil
+ }
+
+ // Set network namespace path
+ settings.SandboxKey = c.state.NetNS.Path()
+
+ netStatus := c.getNetworkStatus()
+ // If this is empty, we're probably slirp4netns
+ if len(netStatus) == 0 {
+ return settings, nil
+ }
+
+ // If we have networks - handle that here
+ if len(networks) > 0 {
+ if len(networks) != len(netStatus) {
+ return nil, fmt.Errorf("network inspection mismatch: asked to join %d network(s) %v, but have information on %d network(s): %w", len(networks), networks, len(netStatus), define.ErrInternal)
+ }
+
+ settings.Networks = make(map[string]*define.InspectAdditionalNetwork)
+
+ for name, opts := range networks {
+ result := netStatus[name]
+ addedNet := new(define.InspectAdditionalNetwork)
+ addedNet.NetworkID = name
+ addedNet.Aliases = opts.Aliases
+ addedNet.InspectBasicNetworkConfig = resultToBasicNetworkConfig(result)
+
+ settings.Networks[name] = addedNet
+ }
+
+ // if not only the default network is connected we can return here
+ // otherwise we have to populate the InspectBasicNetworkConfig settings
+ _, isDefaultNet := networks[c.runtime.config.Network.DefaultNetwork]
+ if !(len(networks) == 1 && isDefaultNet) {
+ return settings, nil
+ }
+ }
+
+ // If not joining networks, we should have at most 1 result
+ if len(netStatus) > 1 {
+ return nil, fmt.Errorf("should have at most 1 network status result if not joining networks, instead got %d: %w", len(netStatus), define.ErrInternal)
+ }
+
+ if len(netStatus) == 1 {
+ for _, status := range netStatus {
+ settings.InspectBasicNetworkConfig = resultToBasicNetworkConfig(status)
+ }
+ }
+ return settings, nil
+}
+
+// resultToBasicNetworkConfig produces an InspectBasicNetworkConfig from a CNI
+// result
+func resultToBasicNetworkConfig(result types.StatusBlock) define.InspectBasicNetworkConfig {
+ config := define.InspectBasicNetworkConfig{}
+ interfaceNames := make([]string, 0, len(result.Interfaces))
+ for interfaceName := range result.Interfaces {
+ interfaceNames = append(interfaceNames, interfaceName)
+ }
+ // ensure consistent inspect results by sorting
+ sort.Strings(interfaceNames)
+ for _, interfaceName := range interfaceNames {
+ netInt := result.Interfaces[interfaceName]
+ for _, netAddress := range netInt.Subnets {
+ size, _ := netAddress.IPNet.Mask.Size()
+ if netAddress.IPNet.IP.To4() != nil {
+ // ipv4
+ if config.IPAddress == "" {
+ config.IPAddress = netAddress.IPNet.IP.String()
+ config.IPPrefixLen = size
+ config.Gateway = netAddress.Gateway.String()
+ } else {
+ config.SecondaryIPAddresses = append(config.SecondaryIPAddresses, define.Address{Addr: netAddress.IPNet.IP.String(), PrefixLength: size})
+ }
+ } else {
+ // ipv6
+ if config.GlobalIPv6Address == "" {
+ config.GlobalIPv6Address = netAddress.IPNet.IP.String()
+ config.GlobalIPv6PrefixLen = size
+ config.IPv6Gateway = netAddress.Gateway.String()
+ } else {
+ config.SecondaryIPv6Addresses = append(config.SecondaryIPv6Addresses, define.Address{Addr: netAddress.IPNet.IP.String(), PrefixLength: size})
+ }
+ }
+ }
+ if config.MacAddress == "" {
+ config.MacAddress = netInt.MacAddress.String()
+ } else {
+ config.AdditionalMacAddresses = append(config.AdditionalMacAddresses, netInt.MacAddress.String())
+ }
+ }
+ return config
+}
+
+// NetworkDisconnect removes a container from the network
+func (c *Container) NetworkDisconnect(nameOrID, netName string, force bool) error {
+ // only the bridge mode supports cni networks
+ if err := isBridgeNetMode(c.config.NetMode); err != nil {
+ return err
+ }
+
+ c.lock.Lock()
+ defer c.lock.Unlock()
+
+ networks, err := c.networks()
+ if err != nil {
+ return err
+ }
+
+ // check if network exists and if the input is a ID we get the name
+ // CNI only uses names so it is important that we only use the name
+ netName, err = c.runtime.normalizeNetworkName(netName)
+ if err != nil {
+ return err
+ }
+
+ _, nameExists := networks[netName]
+ if !nameExists && len(networks) > 0 {
+ return fmt.Errorf("container %s is not connected to network %s", nameOrID, netName)
+ }
+
+ if err := c.syncContainer(); err != nil {
+ return err
+ }
+ // get network status before we disconnect
+ networkStatus := c.getNetworkStatus()
+
+ if err := c.runtime.state.NetworkDisconnect(c, netName); err != nil {
+ return err
+ }
+
+ c.newNetworkEvent(events.NetworkDisconnect, netName)
+ if !c.ensureState(define.ContainerStateRunning, define.ContainerStateCreated) {
+ return nil
+ }
+
+ if c.state.NetNS == nil {
+ return fmt.Errorf("unable to disconnect %s from %s: %w", nameOrID, netName, define.ErrNoNetwork)
+ }
+
+ opts := types.NetworkOptions{
+ ContainerID: c.config.ID,
+ ContainerName: getCNIPodName(c),
+ }
+ opts.PortMappings = c.convertPortMappings()
+ opts.Networks = map[string]types.PerNetworkOptions{
+ netName: networks[netName],
+ }
+
+ if err := c.runtime.teardownNetwork(c.state.NetNS.Path(), opts); err != nil {
+ return err
+ }
+
+ // update network status if container is running
+ oldStatus, statusExist := networkStatus[netName]
+ delete(networkStatus, netName)
+ c.state.NetworkStatus = networkStatus
+ err = c.save()
+ if err != nil {
+ return err
+ }
+
+ // Reload ports when there are still connected networks, maybe we removed the network interface with the child ip.
+ // Reloading without connected networks does not make sense, so we can skip this step.
+ if rootless.IsRootless() && len(networkStatus) > 0 {
+ if err := c.reloadRootlessRLKPortMapping(); err != nil {
+ return err
+ }
+ }
+
+ // Update resolv.conf if required
+ if statusExist {
+ stringIPs := make([]string, 0, len(oldStatus.DNSServerIPs))
+ for _, ip := range oldStatus.DNSServerIPs {
+ stringIPs = append(stringIPs, ip.String())
+ }
+ if len(stringIPs) > 0 {
+ logrus.Debugf("Removing DNS Servers %v from resolv.conf", stringIPs)
+ if err := c.removeNameserver(stringIPs); err != nil {
+ return err
+ }
+ }
+
+ // update /etc/hosts file
+ if file, ok := c.state.BindMounts[config.DefaultHostsFile]; ok {
+ // sync the names with c.getHostsEntries()
+ names := []string{c.Hostname(), c.config.Name}
+ rm := etchosts.GetNetworkHostEntries(map[string]types.StatusBlock{netName: oldStatus}, names...)
+ if len(rm) > 0 {
+ // make sure to lock this file to prevent concurrent writes when
+ // this is used a net dependency container
+ lock, err := lockfile.GetLockfile(file)
+ if err != nil {
+ return fmt.Errorf("failed to lock hosts file: %w", err)
+ }
+ logrus.Debugf("Remove /etc/hosts entries %v", rm)
+ lock.Lock()
+ err = etchosts.Remove(file, rm)
+ lock.Unlock()
+ if err != nil {
+ return err
+ }
+ }
+ }
+ }
+ return nil
+}
+
+// ConnectNetwork connects a container to a given network
+func (c *Container) NetworkConnect(nameOrID, netName string, netOpts types.PerNetworkOptions) error {
+ // only the bridge mode supports cni networks
+ if err := isBridgeNetMode(c.config.NetMode); err != nil {
+ return err
+ }
+
+ c.lock.Lock()
+ defer c.lock.Unlock()
+
+ networks, err := c.networks()
+ if err != nil {
+ return err
+ }
+
+ // check if network exists and if the input is a ID we get the name
+ // CNI only uses names so it is important that we only use the name
+ netName, err = c.runtime.normalizeNetworkName(netName)
+ if err != nil {
+ return err
+ }
+
+ if err := c.syncContainer(); err != nil {
+ return err
+ }
+
+ // get network status before we connect
+ networkStatus := c.getNetworkStatus()
+
+ // always add the short id as alias for docker compat
+ netOpts.Aliases = append(netOpts.Aliases, c.config.ID[:12])
+
+ if netOpts.InterfaceName == "" {
+ netOpts.InterfaceName = getFreeInterfaceName(networks)
+ if netOpts.InterfaceName == "" {
+ return errors.New("could not find free network interface name")
+ }
+ }
+
+ if err := c.runtime.state.NetworkConnect(c, netName, netOpts); err != nil {
+ // Docker compat: treat requests to attach already attached networks as a no-op, ignoring opts
+ if errors.Is(err, define.ErrNetworkConnected) && c.ensureState(define.ContainerStateConfigured) {
+ return nil
+ }
+
+ return err
+ }
+ c.newNetworkEvent(events.NetworkConnect, netName)
+ if !c.ensureState(define.ContainerStateRunning, define.ContainerStateCreated) {
+ return nil
+ }
+ if c.state.NetNS == nil {
+ return fmt.Errorf("unable to connect %s to %s: %w", nameOrID, netName, define.ErrNoNetwork)
+ }
+
+ opts := types.NetworkOptions{
+ ContainerID: c.config.ID,
+ ContainerName: getCNIPodName(c),
+ }
+ opts.PortMappings = c.convertPortMappings()
+ opts.Networks = map[string]types.PerNetworkOptions{
+ netName: netOpts,
+ }
+
+ results, err := c.runtime.setUpNetwork(c.state.NetNS.Path(), opts)
+ if err != nil {
+ return err
+ }
+ if len(results) != 1 {
+ return errors.New("when adding aliases, results must be of length 1")
+ }
+
+ // we need to get the old host entries before we add the new one to the status
+ // if we do not add do it here we will get the wrong existing entries which will throw of the logic
+ // we could also copy the map but this does not seem worth it
+ // sync the hostNames with c.getHostsEntries()
+ hostNames := []string{c.Hostname(), c.config.Name}
+ oldHostEntries := etchosts.GetNetworkHostEntries(networkStatus, hostNames...)
+
+ // update network status
+ if networkStatus == nil {
+ networkStatus = make(map[string]types.StatusBlock, 1)
+ }
+ networkStatus[netName] = results[netName]
+ c.state.NetworkStatus = networkStatus
+
+ err = c.save()
+ if err != nil {
+ return err
+ }
+
+ // The first network needs a port reload to set the correct child ip for the rootlessport process.
+ // Adding a second network does not require a port reload because the child ip is still valid.
+ if rootless.IsRootless() && len(networks) == 0 {
+ if err := c.reloadRootlessRLKPortMapping(); err != nil {
+ return err
+ }
+ }
+
+ ipv6, err := c.checkForIPv6(networkStatus)
+ if err != nil {
+ return err
+ }
+
+ // Update resolv.conf if required
+ stringIPs := make([]string, 0, len(results[netName].DNSServerIPs))
+ for _, ip := range results[netName].DNSServerIPs {
+ if (ip.To4() == nil) && !ipv6 {
+ continue
+ }
+ stringIPs = append(stringIPs, ip.String())
+ }
+ if len(stringIPs) > 0 {
+ logrus.Debugf("Adding DNS Servers %v to resolv.conf", stringIPs)
+ if err := c.addNameserver(stringIPs); err != nil {
+ return err
+ }
+ }
+
+ // update /etc/hosts file
+ if file, ok := c.state.BindMounts[config.DefaultHostsFile]; ok {
+ // make sure to lock this file to prevent concurrent writes when
+ // this is used a net dependency container
+ lock, err := lockfile.GetLockfile(file)
+ if err != nil {
+ return fmt.Errorf("failed to lock hosts file: %w", err)
+ }
+ new := etchosts.GetNetworkHostEntries(results, hostNames...)
+ logrus.Debugf("Add /etc/hosts entries %v", new)
+ // use special AddIfExists API to make sure we only add new entries if an old one exists
+ // see the AddIfExists() comment for more information
+ lock.Lock()
+ err = etchosts.AddIfExists(file, oldHostEntries, new)
+ lock.Unlock()
+ if err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+// get a free interface name for a new network
+// return an empty string if no free name was found
+func getFreeInterfaceName(networks map[string]types.PerNetworkOptions) string {
+ ifNames := make([]string, 0, len(networks))
+ for _, opts := range networks {
+ ifNames = append(ifNames, opts.InterfaceName)
+ }
+ for i := 0; i < 100000; i++ {
+ ifName := fmt.Sprintf("eth%d", i)
+ if !util.StringInSlice(ifName, ifNames) {
+ return ifName
+ }
+ }
+ return ""
+}
+
+// DisconnectContainerFromNetwork removes a container from its CNI network
+func (r *Runtime) DisconnectContainerFromNetwork(nameOrID, netName string, force bool) error {
+ ctr, err := r.LookupContainer(nameOrID)
+ if err != nil {
+ return err
+ }
+ return ctr.NetworkDisconnect(nameOrID, netName, force)
+}
+
+// ConnectContainerToNetwork connects a container to a CNI network
+func (r *Runtime) ConnectContainerToNetwork(nameOrID, netName string, netOpts types.PerNetworkOptions) error {
+ ctr, err := r.LookupContainer(nameOrID)
+ if err != nil {
+ return err
+ }
+ return ctr.NetworkConnect(nameOrID, netName, netOpts)
+}
+
+// normalizeNetworkName takes a network name, a partial or a full network ID and returns the network name.
+// If the network is not found a errors is returned.
+func (r *Runtime) normalizeNetworkName(nameOrID string) (string, error) {
+ net, err := r.network.NetworkInspect(nameOrID)
+ if err != nil {
+ return "", err
+ }
+ return net.Name, nil
+}
+
+// ocicniPortsToNetTypesPorts convert the old port format to the new one
+// while deduplicating ports into ranges
+func ocicniPortsToNetTypesPorts(ports []types.OCICNIPortMapping) []types.PortMapping {
+ if len(ports) == 0 {
+ return nil
+ }
+
+ newPorts := make([]types.PortMapping, 0, len(ports))
+
+ // first sort the ports
+ sort.Slice(ports, func(i, j int) bool {
+ return compareOCICNIPorts(ports[i], ports[j])
+ })
+
+ // we already check if the slice is empty so we can use the first element
+ currentPort := types.PortMapping{
+ HostIP: ports[0].HostIP,
+ HostPort: uint16(ports[0].HostPort),
+ ContainerPort: uint16(ports[0].ContainerPort),
+ Protocol: ports[0].Protocol,
+ Range: 1,
+ }
+
+ for i := 1; i < len(ports); i++ {
+ if ports[i].HostIP == currentPort.HostIP &&
+ ports[i].Protocol == currentPort.Protocol &&
+ ports[i].HostPort-int32(currentPort.Range) == int32(currentPort.HostPort) &&
+ ports[i].ContainerPort-int32(currentPort.Range) == int32(currentPort.ContainerPort) {
+ currentPort.Range++
+ } else {
+ newPorts = append(newPorts, currentPort)
+ currentPort = types.PortMapping{
+ HostIP: ports[i].HostIP,
+ HostPort: uint16(ports[i].HostPort),
+ ContainerPort: uint16(ports[i].ContainerPort),
+ Protocol: ports[i].Protocol,
+ Range: 1,
+ }
+ }
+ }
+ newPorts = append(newPorts, currentPort)
+ return newPorts
+}
+
+// compareOCICNIPorts will sort the ocicni ports by
+// 1) host ip
+// 2) protocol
+// 3) hostPort
+// 4) container port
+func compareOCICNIPorts(i, j types.OCICNIPortMapping) bool {
+ if i.HostIP != j.HostIP {
+ return i.HostIP < j.HostIP
+ }
+
+ if i.Protocol != j.Protocol {
+ return i.Protocol < j.Protocol
+ }
+
+ if i.HostPort != j.HostPort {
+ return i.HostPort < j.HostPort
+ }
+
+ return i.ContainerPort < j.ContainerPort
+}
diff --git a/libpod/networking_freebsd.go b/libpod/networking_freebsd.go
new file mode 100644
index 000000000..230efc99d
--- /dev/null
+++ b/libpod/networking_freebsd.go
@@ -0,0 +1,268 @@
+//go:build freebsd
+// +build freebsd
+
+package libpod
+
+import (
+ "crypto/rand"
+ jdec "encoding/json"
+ "errors"
+ "fmt"
+ "net"
+ "os/exec"
+ "path/filepath"
+
+ "github.com/containers/buildah/pkg/jail"
+ "github.com/containers/common/libnetwork/types"
+ "github.com/containers/storage/pkg/lockfile"
+ "github.com/sirupsen/logrus"
+)
+
+type Netstat struct {
+ Statistics NetstatInterface `json:"statistics"`
+}
+
+type NetstatInterface struct {
+ Interface []NetstatAddress `json:"interface"`
+}
+
+type NetstatAddress struct {
+ Name string `json:"name"`
+ Flags string `json:"flags"`
+ Mtu int `json:"mtu"`
+ Network string `json:"network"`
+ Address string `json:"address"`
+
+ ReceivedPackets uint64 `json:"received-packets"`
+ ReceivedBytes uint64 `json:"received-bytes"`
+ ReceivedErrors uint64 `json:"received-errors"`
+
+ SentPackets uint64 `json:"sent-packets"`
+ SentBytes uint64 `json:"sent-bytes"`
+ SentErrors uint64 `json:"send-errors"`
+
+ DroppedPackets uint64 `json:"dropped-packets"`
+
+ Collisions uint64 `json:"collisions"`
+}
+
+// copied from github.com/vishvanada/netlink which does not build on freebsd
+type LinkStatistics64 struct {
+ RxPackets uint64
+ TxPackets uint64
+ RxBytes uint64
+ TxBytes uint64
+ RxErrors uint64
+ TxErrors uint64
+ RxDropped uint64
+ TxDropped uint64
+ Multicast uint64
+ Collisions uint64
+ RxLengthErrors uint64
+ RxOverErrors uint64
+ RxCrcErrors uint64
+ RxFrameErrors uint64
+ RxFifoErrors uint64
+ RxMissedErrors uint64
+ TxAbortedErrors uint64
+ TxCarrierErrors uint64
+ TxFifoErrors uint64
+ TxHeartbeatErrors uint64
+ TxWindowErrors uint64
+ RxCompressed uint64
+ TxCompressed uint64
+}
+
+type RootlessNetNS struct {
+ dir string
+ Lock lockfile.Locker
+}
+
+// getPath will join the given path to the rootless netns dir
+func (r *RootlessNetNS) getPath(path string) string {
+ return filepath.Join(r.dir, path)
+}
+
+// Do - run the given function in the rootless netns.
+// It does not lock the rootlessCNI lock, the caller
+// should only lock when needed, e.g. for cni operations.
+func (r *RootlessNetNS) Do(toRun func() error) error {
+ return errors.New("not supported on freebsd")
+}
+
+// Cleanup the rootless network namespace if needed.
+// It checks if we have running containers with the bridge network mode.
+// Cleanup() expects that r.Lock is locked
+func (r *RootlessNetNS) Cleanup(runtime *Runtime) error {
+ return errors.New("not supported on freebsd")
+}
+
+// GetRootlessNetNs returns the rootless netns object. If create is set to true
+// the rootless network namespace will be created if it does not exists already.
+// If called as root it returns always nil.
+// On success the returned RootlessCNI lock is locked and must be unlocked by the caller.
+func (r *Runtime) GetRootlessNetNs(new bool) (*RootlessNetNS, error) {
+ return nil, nil
+}
+
+func GetSlirp4netnsIP(subnet *net.IPNet) (*net.IP, error) {
+ return nil, errors.New("not implemented GetSlirp4netnsIP")
+}
+
+// While there is code in container_internal.go which calls this, in
+// my testing network creation always seems to go through createNetNS.
+func (r *Runtime) setupNetNS(ctr *Container) error {
+ return errors.New("not implemented (*Runtime) setupNetNS")
+}
+
+// Create and configure a new network namespace for a container
+func (r *Runtime) configureNetNS(ctr *Container, ctrNS *jailNetNS) (status map[string]types.StatusBlock, rerr error) {
+ if err := r.exposeMachinePorts(ctr.config.PortMappings); err != nil {
+ return nil, err
+ }
+ defer func() {
+ // make sure to unexpose the gvproxy ports when an error happens
+ if rerr != nil {
+ if err := r.unexposeMachinePorts(ctr.config.PortMappings); err != nil {
+ logrus.Errorf("failed to free gvproxy machine ports: %v", err)
+ }
+ }
+ }()
+ networks, err := ctr.networks()
+ if err != nil {
+ return nil, err
+ }
+ // All networks have been removed from the container.
+ // This is effectively forcing net=none.
+ if len(networks) == 0 {
+ return nil, nil
+ }
+
+ netOpts := ctr.getNetworkOptions(networks)
+ netStatus, err := r.setUpNetwork(ctrNS.Name, netOpts)
+ if err != nil {
+ return nil, err
+ }
+
+ return netStatus, err
+}
+
+// Create and configure a new network namespace for a container
+func (r *Runtime) createNetNS(ctr *Container) (n *jailNetNS, q map[string]types.StatusBlock, retErr error) {
+ b := make([]byte, 16)
+ _, err := rand.Reader.Read(b)
+ if err != nil {
+ return nil, nil, fmt.Errorf("failed to generate random vnet name: %v", err)
+ }
+ ctrNS := &jailNetNS{Name: fmt.Sprintf("vnet-%x-%x-%x-%x-%x", b[0:4], b[4:6], b[6:8], b[8:10], b[10:])}
+
+ jconf := jail.NewConfig()
+ jconf.Set("name", ctrNS.Name)
+ jconf.Set("vnet", jail.NEW)
+ jconf.Set("children.max", 1)
+ jconf.Set("persist", true)
+ jconf.Set("enforce_statfs", 0)
+ jconf.Set("devfs_ruleset", 4)
+ jconf.Set("allow.raw_sockets", true)
+ jconf.Set("allow.chflags", true)
+ jconf.Set("securelevel", -1)
+ if _, err := jail.Create(jconf); err != nil {
+ logrus.Debugf("Failed to create vnet jail %s for container %s", ctrNS.Name, ctr.ID())
+ }
+
+ logrus.Debugf("Created vnet jail %s for container %s", ctrNS.Name, ctr.ID())
+
+ var networkStatus map[string]types.StatusBlock
+ networkStatus, err = r.configureNetNS(ctr, ctrNS)
+ return ctrNS, networkStatus, err
+}
+
+// Tear down a network namespace, undoing all state associated with it.
+func (r *Runtime) teardownNetNS(ctr *Container) error {
+ if err := r.unexposeMachinePorts(ctr.config.PortMappings); err != nil {
+ // do not return an error otherwise we would prevent network cleanup
+ logrus.Errorf("failed to free gvproxy machine ports: %v", err)
+ }
+ if err := r.teardownCNI(ctr); err != nil {
+ return err
+ }
+
+ if ctr.state.NetNS != nil {
+ // Rather than destroying the jail immediately, reset the
+ // persist flag so that it will live until the container is
+ // done.
+ netjail, err := jail.FindByName(ctr.state.NetNS.Name)
+ if err != nil {
+ return fmt.Errorf("finding network jail %s: %w", ctr.state.NetNS.Name, err)
+ }
+ jconf := jail.NewConfig()
+ jconf.Set("persist", false)
+ if err := netjail.Set(jconf); err != nil {
+ return fmt.Errorf("releasing network jail %s: %w", ctr.state.NetNS.Name, err)
+ }
+
+ ctr.state.NetNS = nil
+ }
+
+ return nil
+}
+
+func getContainerNetIO(ctr *Container) (*LinkStatistics64, error) {
+ if ctr.state.NetNS == nil {
+ // If NetNS is nil, it was set as none, and no netNS
+ // was set up this is a valid state and thus return no
+ // error, nor any statistics
+ return nil, nil
+ }
+
+ // FIXME get the interface from the container netstatus
+ cmd := exec.Command("jexec", ctr.state.NetNS.Name, "netstat", "-bI", "eth0", "--libxo", "json")
+ out, err := cmd.Output()
+ if err != nil {
+ return nil, err
+ }
+ stats := Netstat{}
+ if err := jdec.Unmarshal(out, &stats); err != nil {
+ return nil, err
+ }
+
+ // Find the link stats
+ for _, ifaddr := range stats.Statistics.Interface {
+ if ifaddr.Mtu > 0 {
+ return &LinkStatistics64{
+ RxPackets: ifaddr.ReceivedPackets,
+ TxPackets: ifaddr.SentPackets,
+ RxBytes: ifaddr.ReceivedBytes,
+ TxBytes: ifaddr.SentBytes,
+ RxErrors: ifaddr.ReceivedErrors,
+ TxErrors: ifaddr.SentErrors,
+ RxDropped: ifaddr.DroppedPackets,
+ Collisions: ifaddr.Collisions,
+ }, nil
+ }
+ }
+
+ return &LinkStatistics64{}, nil
+}
+
+func (c *Container) joinedNetworkNSPath() string {
+ if c.state.NetNS != nil {
+ return c.state.NetNS.Name
+ } else {
+ return ""
+ }
+}
+
+func (c *Container) inspectJoinedNetworkNS(networkns string) (q types.StatusBlock, retErr error) {
+ // TODO: extract interface information from the vnet jail
+ return types.StatusBlock{}, nil
+
+}
+
+func (c *Container) reloadRootlessRLKPortMapping() error {
+ return errors.New("unsupported (*Container).reloadRootlessRLKPortMapping")
+}
+
+func (c *Container) setupRootlessNetwork() error {
+ return nil
+}
diff --git a/libpod/networking_linux.go b/libpod/networking_linux.go
index c05796768..e27ec8e9d 100644
--- a/libpod/networking_linux.go
+++ b/libpod/networking_linux.go
@@ -13,25 +13,17 @@ import (
"os"
"os/exec"
"path/filepath"
- "regexp"
- "sort"
"strconv"
"strings"
"syscall"
"time"
"github.com/containernetworking/plugins/pkg/ns"
- "github.com/containers/common/libnetwork/etchosts"
"github.com/containers/common/libnetwork/resolvconf"
"github.com/containers/common/libnetwork/types"
- "github.com/containers/common/pkg/config"
- "github.com/containers/common/pkg/machine"
"github.com/containers/common/pkg/netns"
"github.com/containers/common/pkg/util"
- "github.com/containers/podman/v4/libpod/define"
- "github.com/containers/podman/v4/libpod/events"
"github.com/containers/podman/v4/pkg/errorhandling"
- "github.com/containers/podman/v4/pkg/namespaces"
"github.com/containers/podman/v4/pkg/rootless"
"github.com/containers/podman/v4/utils"
"github.com/containers/storage/pkg/lockfile"
@@ -59,39 +51,6 @@ const (
persistentCNIDir = "/var/lib/cni"
)
-// convertPortMappings will remove the HostIP part from the ports when running inside podman machine.
-// This is need because a HostIP of 127.0.0.1 would now allow the gvproxy forwarder to reach to open ports.
-// For machine the HostIP must only be used by gvproxy and never in the VM.
-func (c *Container) convertPortMappings() []types.PortMapping {
- if !machine.IsGvProxyBased() || len(c.config.PortMappings) == 0 {
- return c.config.PortMappings
- }
- // if we run in a machine VM we have to ignore the host IP part
- newPorts := make([]types.PortMapping, 0, len(c.config.PortMappings))
- for _, port := range c.config.PortMappings {
- port.HostIP = ""
- newPorts = append(newPorts, port)
- }
- return newPorts
-}
-
-func (c *Container) getNetworkOptions(networkOpts map[string]types.PerNetworkOptions) types.NetworkOptions {
- opts := types.NetworkOptions{
- ContainerID: c.config.ID,
- ContainerName: getCNIPodName(c),
- }
- opts.PortMappings = c.convertPortMappings()
-
- // If the container requested special network options use this instead of the config.
- // This is the case for container restore or network reload.
- if c.perNetworkOpts != nil {
- opts.Networks = c.perNetworkOpts
- } else {
- opts.Networks = networkOpts
- }
- return opts
-}
-
type RootlessNetNS struct {
ns ns.NetNS
dir string
@@ -354,7 +313,7 @@ func (r *RootlessNetNS) Cleanup(runtime *Runtime) error {
}
}
if err != nil {
- logrus.Errorf("Failed to kill slirp4netns process: %s", err)
+ logrus.Errorf("Failed to kill slirp4netns process: %v", err)
}
err = os.RemoveAll(r.dir)
if err != nil {
@@ -411,13 +370,13 @@ func (r *Runtime) GetRootlessNetNs(new bool) (*RootlessNetNS, error) {
if err != nil {
if !new {
// return a error if we could not get the namespace and should no create one
- return nil, fmt.Errorf("error getting rootless network namespace: %w", err)
+ return nil, fmt.Errorf("getting rootless network namespace: %w", err)
}
// create a new namespace
logrus.Debugf("creating rootless network namespace with name %q", netnsName)
ns, err = netns.NewNSWithName(netnsName)
if err != nil {
- return nil, fmt.Errorf("error creating rootless network namespace: %w", err)
+ return nil, fmt.Errorf("creating rootless network namespace: %w", err)
}
// set up slirp4netns here
path := r.config.Engine.NetworkCmdPath
@@ -442,7 +401,7 @@ func (r *Runtime) GetRootlessNetNs(new bool) (*RootlessNetNS, error) {
}
slirpFeatures, err := checkSlirpFlags(path)
if err != nil {
- return nil, fmt.Errorf("error checking slirp4netns binary %s: %q: %w", path, err, err)
+ return nil, fmt.Errorf("checking slirp4netns binary %s: %q: %w", path, err, err)
}
cmdArgs, err := createBasicSlirp4netnsCmdArgs(netOptions, slirpFeatures)
if err != nil {
@@ -589,41 +548,6 @@ func (r *Runtime) GetRootlessNetNs(new bool) (*RootlessNetNS, error) {
return rootlessNetNS, nil
}
-// setUpNetwork will set up the the networks, on error it will also tear down the cni
-// networks. If rootless it will join/create the rootless network namespace.
-func (r *Runtime) setUpNetwork(ns string, opts types.NetworkOptions) (map[string]types.StatusBlock, error) {
- rootlessNetNS, err := r.GetRootlessNetNs(true)
- if err != nil {
- return nil, err
- }
- var results map[string]types.StatusBlock
- setUpPod := func() error {
- results, err = r.network.Setup(ns, types.SetupOptions{NetworkOptions: opts})
- return err
- }
- // rootlessNetNS is nil if we are root
- if rootlessNetNS != nil {
- // execute the setup in the rootless net ns
- err = rootlessNetNS.Do(setUpPod)
- rootlessNetNS.Lock.Unlock()
- } else {
- err = setUpPod()
- }
- return results, err
-}
-
-// getCNIPodName return the pod name (hostname) used by CNI and the dnsname plugin.
-// If we are in the pod network namespace use the pod name otherwise the container name
-func getCNIPodName(c *Container) string {
- if c.config.NetMode.IsPod() || c.IsInfra() {
- pod, err := c.runtime.state.Pod(c.PodID())
- if err == nil {
- return pod.Name()
- }
- }
- return c.Name()
-}
-
// Create and configure a new network namespace for a container
func (r *Runtime) configureNetNS(ctr *Container, ctrNS ns.NetNS) (status map[string]types.StatusBlock, rerr error) {
if err := r.exposeMachinePorts(ctr.config.PortMappings); err != nil {
@@ -675,7 +599,7 @@ func (r *Runtime) configureNetNS(ctr *Container, ctrNS ns.NetNS) (status map[str
func (r *Runtime) createNetNS(ctr *Container) (n ns.NetNS, q map[string]types.StatusBlock, retErr error) {
ctrNS, err := netns.NewNS()
if err != nil {
- return nil, nil, fmt.Errorf("error creating network namespace for container %s: %w", ctr.ID(), err)
+ return nil, nil, fmt.Errorf("creating network namespace for container %s: %w", ctr.ID(), err)
}
defer func() {
if retErr != nil {
@@ -742,7 +666,7 @@ func (r *Runtime) setupNetNS(ctr *Container) error {
func joinNetNS(path string) (ns.NetNS, error) {
netNS, err := ns.GetNS(path)
if err != nil {
- return nil, fmt.Errorf("error retrieving network namespace at %s: %w", path, err)
+ return nil, fmt.Errorf("retrieving network namespace at %s: %w", path, err)
}
return netNS, nil
@@ -758,7 +682,7 @@ func (r *Runtime) closeNetNS(ctr *Container) error {
}
if err := ctr.state.NetNS.Close(); err != nil {
- return fmt.Errorf("error closing network namespace for container %s: %w", ctr.ID(), err)
+ return fmt.Errorf("closing network namespace for container %s: %w", ctr.ID(), err)
}
ctr.state.NetNS = nil
@@ -766,56 +690,6 @@ func (r *Runtime) closeNetNS(ctr *Container) error {
return nil
}
-// Tear down a container's network configuration and joins the
-// rootless net ns as rootless user
-func (r *Runtime) teardownNetwork(ns string, opts types.NetworkOptions) error {
- rootlessNetNS, err := r.GetRootlessNetNs(false)
- if err != nil {
- return err
- }
- tearDownPod := func() error {
- if err := r.network.Teardown(ns, types.TeardownOptions{NetworkOptions: opts}); err != nil {
- return fmt.Errorf("error tearing down network namespace configuration for container %s: %w", opts.ContainerID, err)
- }
- return nil
- }
-
- // rootlessNetNS is nil if we are root
- if rootlessNetNS != nil {
- // execute the cni setup in the rootless net ns
- err = rootlessNetNS.Do(tearDownPod)
- if cerr := rootlessNetNS.Cleanup(r); cerr != nil {
- logrus.WithError(err).Error("failed to clean up rootless netns")
- }
- rootlessNetNS.Lock.Unlock()
- } else {
- err = tearDownPod()
- }
- return err
-}
-
-// Tear down a container's CNI network configuration, but do not tear down the
-// namespace itself.
-func (r *Runtime) teardownCNI(ctr *Container) error {
- if ctr.state.NetNS == nil {
- // The container has no network namespace, we're set
- return nil
- }
-
- logrus.Debugf("Tearing down network namespace at %s for container %s", ctr.state.NetNS.Path(), ctr.ID())
-
- networks, err := ctr.networks()
- if err != nil {
- return err
- }
-
- if !ctr.config.NetMode.IsSlirp4netns() && len(networks) > 0 {
- netOpts := ctr.getNetworkOptions(networks)
- return r.teardownNetwork(ctr.state.NetNS.Path(), netOpts)
- }
- return nil
-}
-
// Tear down a network namespace, undoing all state associated with it.
func (r *Runtime) teardownNetNS(ctr *Container) error {
if err := r.unexposeMachinePorts(ctr.config.PortMappings); err != nil {
@@ -828,12 +702,12 @@ func (r *Runtime) teardownNetNS(ctr *Container) error {
// First unmount the namespace
if err := netns.UnmountNS(ctr.state.NetNS); err != nil {
- return fmt.Errorf("error unmounting network namespace for container %s: %w", ctr.ID(), err)
+ return fmt.Errorf("unmounting network namespace for container %s: %w", ctr.ID(), err)
}
// Now close the open file descriptor
if err := ctr.state.NetNS.Close(); err != nil {
- return fmt.Errorf("error closing network namespace for container %s: %w", ctr.ID(), err)
+ return fmt.Errorf("closing network namespace for container %s: %w", ctr.ID(), err)
}
ctr.state.NetNS = nil
@@ -862,72 +736,6 @@ func getContainerNetNS(ctr *Container) (string, *Container, error) {
return "", nil, nil
}
-// isBridgeNetMode checks if the given network mode is bridge.
-// It returns nil when it is set to bridge and an error otherwise.
-func isBridgeNetMode(n namespaces.NetworkMode) error {
- if !n.IsBridge() {
- return fmt.Errorf("%q is not supported: %w", n, define.ErrNetworkModeInvalid)
- }
- return nil
-}
-
-// Reload only works with containers with a configured network.
-// It will tear down, and then reconfigure, the network of the container.
-// This is mainly used when a reload of firewall rules wipes out existing
-// firewall configuration.
-// Efforts will be made to preserve MAC and IP addresses, but this only works if
-// the container only joined a single CNI network, and was only assigned a
-// single MAC or IP.
-// Only works on root containers at present, though in the future we could
-// extend this to stop + restart slirp4netns
-func (r *Runtime) reloadContainerNetwork(ctr *Container) (map[string]types.StatusBlock, error) {
- if ctr.state.NetNS == nil {
- return nil, fmt.Errorf("container %s network is not configured, refusing to reload: %w", ctr.ID(), define.ErrCtrStateInvalid)
- }
- if err := isBridgeNetMode(ctr.config.NetMode); err != nil {
- return nil, err
- }
- logrus.Infof("Going to reload container %s network", ctr.ID())
-
- err := r.teardownCNI(ctr)
- if err != nil {
- // teardownCNI will error if the iptables rules do not exists and this is the case after
- // a firewall reload. The purpose of network reload is to recreate the rules if they do
- // not exists so we should not log this specific error as error. This would confuse users otherwise.
- // iptables-legacy and iptables-nft will create different errors make sure to match both.
- b, rerr := regexp.MatchString("Couldn't load target `CNI-[a-f0-9]{24}':No such file or directory|Chain 'CNI-[a-f0-9]{24}' does not exist", err.Error())
- if rerr == nil && !b {
- logrus.Error(err)
- } else {
- logrus.Info(err)
- }
- }
-
- networkOpts, err := ctr.networks()
- if err != nil {
- return nil, err
- }
-
- // Set the same network settings as before..
- netStatus := ctr.getNetworkStatus()
- for network, perNetOpts := range networkOpts {
- for name, netInt := range netStatus[network].Interfaces {
- perNetOpts.InterfaceName = name
- perNetOpts.StaticMAC = netInt.MacAddress
- for _, netAddress := range netInt.Subnets {
- perNetOpts.StaticIPs = append(perNetOpts.StaticIPs, netAddress.IPNet.IP)
- }
- // Normally interfaces have a length of 1, only for some special cni configs we could get more.
- // For now just use the first interface to get the ips this should be good enough for most cases.
- break
- }
- networkOpts[network] = perNetOpts
- }
- ctr.perNetworkOpts = networkOpts
-
- return r.configureNetNS(ctr, ctr.state.NetNS)
-}
-
// TODO (5.0): return the statistics per network interface
// This would allow better compat with docker.
func getContainerNetIO(ctr *Container) (*netlink.LinkStatistics, error) {
@@ -981,110 +789,6 @@ func getContainerNetIO(ctr *Container) (*netlink.LinkStatistics, error) {
return netStats, err
}
-// Produce an InspectNetworkSettings containing information on the container
-// network.
-func (c *Container) getContainerNetworkInfo() (*define.InspectNetworkSettings, error) {
- if c.config.NetNsCtr != "" {
- netNsCtr, err := c.runtime.GetContainer(c.config.NetNsCtr)
- if err != nil {
- return nil, err
- }
- // see https://github.com/containers/podman/issues/10090
- // the container has to be locked for syncContainer()
- netNsCtr.lock.Lock()
- defer netNsCtr.lock.Unlock()
- // Have to sync to ensure that state is populated
- if err := netNsCtr.syncContainer(); err != nil {
- return nil, err
- }
- logrus.Debugf("Container %s shares network namespace, retrieving network info of container %s", c.ID(), c.config.NetNsCtr)
-
- return netNsCtr.getContainerNetworkInfo()
- }
-
- settings := new(define.InspectNetworkSettings)
- settings.Ports = makeInspectPortBindings(c.config.PortMappings, c.config.ExposedPorts)
-
- networks, err := c.networks()
- if err != nil {
- return nil, err
- }
-
- if c.state.NetNS == nil {
- if networkNSPath := c.joinedNetworkNSPath(); networkNSPath != "" {
- if result, err := c.inspectJoinedNetworkNS(networkNSPath); err == nil {
- // fallback to dummy configuration
- settings.InspectBasicNetworkConfig = resultToBasicNetworkConfig(result)
- return settings, nil
- }
- // do not propagate error inspecting a joined network ns
- logrus.Errorf("Inspecting network namespace: %s of container %s: %v", networkNSPath, c.ID(), err)
- }
- // We can't do more if the network is down.
-
- // We still want to make dummy configurations for each CNI net
- // the container joined.
- if len(networks) > 0 {
- settings.Networks = make(map[string]*define.InspectAdditionalNetwork, len(networks))
- for net, opts := range networks {
- cniNet := new(define.InspectAdditionalNetwork)
- cniNet.NetworkID = net
- cniNet.Aliases = opts.Aliases
- settings.Networks[net] = cniNet
- }
- }
-
- return settings, nil
- }
-
- // Set network namespace path
- settings.SandboxKey = c.state.NetNS.Path()
-
- netStatus := c.getNetworkStatus()
- // If this is empty, we're probably slirp4netns
- if len(netStatus) == 0 {
- return settings, nil
- }
-
- // If we have networks - handle that here
- if len(networks) > 0 {
- if len(networks) != len(netStatus) {
- return nil, fmt.Errorf("network inspection mismatch: asked to join %d network(s) %v, but have information on %d network(s): %w", len(networks), networks, len(netStatus), define.ErrInternal)
- }
-
- settings.Networks = make(map[string]*define.InspectAdditionalNetwork)
-
- for name, opts := range networks {
- result := netStatus[name]
- addedNet := new(define.InspectAdditionalNetwork)
- addedNet.NetworkID = name
- addedNet.Aliases = opts.Aliases
- addedNet.InspectBasicNetworkConfig = resultToBasicNetworkConfig(result)
-
- settings.Networks[name] = addedNet
- }
-
- // if not only the default network is connected we can return here
- // otherwise we have to populate the InspectBasicNetworkConfig settings
- _, isDefaultNet := networks[c.runtime.config.Network.DefaultNetwork]
- if !(len(networks) == 1 && isDefaultNet) {
- return settings, nil
- }
- }
-
- // If not joining networks, we should have at most 1 result
- if len(netStatus) > 1 {
- return nil, fmt.Errorf("should have at most 1 network status result if not joining networks, instead got %d: %w", len(netStatus), define.ErrInternal)
- }
-
- if len(netStatus) == 1 {
- for _, status := range netStatus {
- settings.InspectBasicNetworkConfig = resultToBasicNetworkConfig(status)
- }
- }
- return settings, nil
-}
-
func (c *Container) joinedNetworkNSPath() string {
for _, namespace := range c.config.Spec.Linux.Namespaces {
if namespace.Type == specs.NetworkNamespace {
@@ -1151,49 +855,6 @@ func (c *Container) inspectJoinedNetworkNS(networkns string) (q types.StatusBloc
return result, err
}
-// resultToBasicNetworkConfig produces an InspectBasicNetworkConfig from a CNI
-// result
-func resultToBasicNetworkConfig(result types.StatusBlock) define.InspectBasicNetworkConfig {
- config := define.InspectBasicNetworkConfig{}
- interfaceNames := make([]string, 0, len(result.Interfaces))
- for interfaceName := range result.Interfaces {
- interfaceNames = append(interfaceNames, interfaceName)
- }
- // ensure consistent inspect results by sorting
- sort.Strings(interfaceNames)
- for _, interfaceName := range interfaceNames {
- netInt := result.Interfaces[interfaceName]
- for _, netAddress := range netInt.Subnets {
- size, _ := netAddress.IPNet.Mask.Size()
- if netAddress.IPNet.IP.To4() != nil {
- // ipv4
- if config.IPAddress == "" {
- config.IPAddress = netAddress.IPNet.IP.String()
- config.IPPrefixLen = size
- config.Gateway = netAddress.Gateway.String()
- } else {
- config.SecondaryIPAddresses = append(config.SecondaryIPAddresses, define.Address{Addr: netAddress.IPNet.IP.String(), PrefixLength: size})
- }
- } else {
- // ipv6
- if config.GlobalIPv6Address == "" {
- config.GlobalIPv6Address = netAddress.IPNet.IP.String()
- config.GlobalIPv6PrefixLen = size
- config.IPv6Gateway = netAddress.Gateway.String()
- } else {
- config.SecondaryIPv6Addresses = append(config.SecondaryIPv6Addresses, define.Address{Addr: netAddress.IPNet.IP.String(), PrefixLength: size})
- }
- }
- }
- if config.MacAddress == "" {
- config.MacAddress = netInt.MacAddress.String()
- } else {
- config.AdditionalMacAddresses = append(config.AdditionalMacAddresses, netInt.MacAddress.String())
- }
- }
- return config
-}
-
type logrusDebugWriter struct {
prefix string
}
@@ -1202,363 +863,3 @@ func (w *logrusDebugWriter) Write(p []byte) (int, error) {
logrus.Debugf("%s%s", w.prefix, string(p))
return len(p), nil
}
-
-// NetworkDisconnect removes a container from the network
-func (c *Container) NetworkDisconnect(nameOrID, netName string, force bool) error {
- // only the bridge mode supports cni networks
- if err := isBridgeNetMode(c.config.NetMode); err != nil {
- return err
- }
-
- c.lock.Lock()
- defer c.lock.Unlock()
-
- networks, err := c.networks()
- if err != nil {
- return err
- }
-
- // check if network exists and if the input is a ID we get the name
- // CNI only uses names so it is important that we only use the name
- netName, err = c.runtime.normalizeNetworkName(netName)
- if err != nil {
- return err
- }
-
- _, nameExists := networks[netName]
- if !nameExists && len(networks) > 0 {
- return fmt.Errorf("container %s is not connected to network %s", nameOrID, netName)
- }
-
- if err := c.syncContainer(); err != nil {
- return err
- }
- // get network status before we disconnect
- networkStatus := c.getNetworkStatus()
-
- if err := c.runtime.state.NetworkDisconnect(c, netName); err != nil {
- return err
- }
-
- c.newNetworkEvent(events.NetworkDisconnect, netName)
- if !c.ensureState(define.ContainerStateRunning, define.ContainerStateCreated) {
- return nil
- }
-
- if c.state.NetNS == nil {
- return fmt.Errorf("unable to disconnect %s from %s: %w", nameOrID, netName, define.ErrNoNetwork)
- }
-
- opts := types.NetworkOptions{
- ContainerID: c.config.ID,
- ContainerName: getCNIPodName(c),
- }
- opts.PortMappings = c.convertPortMappings()
- opts.Networks = map[string]types.PerNetworkOptions{
- netName: networks[netName],
- }
-
- if err := c.runtime.teardownNetwork(c.state.NetNS.Path(), opts); err != nil {
- return err
- }
-
- // update network status if container is running
- oldStatus, statusExist := networkStatus[netName]
- delete(networkStatus, netName)
- c.state.NetworkStatus = networkStatus
- err = c.save()
- if err != nil {
- return err
- }
-
- // Reload ports when there are still connected networks, maybe we removed the network interface with the child ip.
- // Reloading without connected networks does not make sense, so we can skip this step.
- if rootless.IsRootless() && len(networkStatus) > 0 {
- if err := c.reloadRootlessRLKPortMapping(); err != nil {
- return err
- }
- }
-
- // Update resolv.conf if required
- if statusExist {
- stringIPs := make([]string, 0, len(oldStatus.DNSServerIPs))
- for _, ip := range oldStatus.DNSServerIPs {
- stringIPs = append(stringIPs, ip.String())
- }
- if len(stringIPs) > 0 {
- logrus.Debugf("Removing DNS Servers %v from resolv.conf", stringIPs)
- if err := c.removeNameserver(stringIPs); err != nil {
- return err
- }
- }
-
- // update /etc/hosts file
- if file, ok := c.state.BindMounts[config.DefaultHostsFile]; ok {
- // sync the names with c.getHostsEntries()
- names := []string{c.Hostname(), c.config.Name}
- rm := etchosts.GetNetworkHostEntries(map[string]types.StatusBlock{netName: oldStatus}, names...)
- if len(rm) > 0 {
- // make sure to lock this file to prevent concurrent writes when
- // this is used a net dependency container
- lock, err := lockfile.GetLockfile(file)
- if err != nil {
- return fmt.Errorf("failed to lock hosts file: %w", err)
- }
- logrus.Debugf("Remove /etc/hosts entries %v", rm)
- lock.Lock()
- err = etchosts.Remove(file, rm)
- lock.Unlock()
- if err != nil {
- return err
- }
- }
- }
- }
- return nil
-}
-
-// ConnectNetwork connects a container to a given network
-func (c *Container) NetworkConnect(nameOrID, netName string, netOpts types.PerNetworkOptions) error {
- // only the bridge mode supports cni networks
- if err := isBridgeNetMode(c.config.NetMode); err != nil {
- return err
- }
-
- c.lock.Lock()
- defer c.lock.Unlock()
-
- networks, err := c.networks()
- if err != nil {
- return err
- }
-
- // check if network exists and if the input is a ID we get the name
- // CNI only uses names so it is important that we only use the name
- netName, err = c.runtime.normalizeNetworkName(netName)
- if err != nil {
- return err
- }
-
- if err := c.syncContainer(); err != nil {
- return err
- }
-
- // get network status before we connect
- networkStatus := c.getNetworkStatus()
-
- // always add the short id as alias for docker compat
- netOpts.Aliases = append(netOpts.Aliases, c.config.ID[:12])
-
- if netOpts.InterfaceName == "" {
- netOpts.InterfaceName = getFreeInterfaceName(networks)
- if netOpts.InterfaceName == "" {
- return errors.New("could not find free network interface name")
- }
- }
-
- if err := c.runtime.state.NetworkConnect(c, netName, netOpts); err != nil {
- return err
- }
- c.newNetworkEvent(events.NetworkConnect, netName)
- if !c.ensureState(define.ContainerStateRunning, define.ContainerStateCreated) {
- return nil
- }
- if c.state.NetNS == nil {
- return fmt.Errorf("unable to connect %s to %s: %w", nameOrID, netName, define.ErrNoNetwork)
- }
-
- opts := types.NetworkOptions{
- ContainerID: c.config.ID,
- ContainerName: getCNIPodName(c),
- }
- opts.PortMappings = c.convertPortMappings()
- opts.Networks = map[string]types.PerNetworkOptions{
- netName: netOpts,
- }
-
- results, err := c.runtime.setUpNetwork(c.state.NetNS.Path(), opts)
- if err != nil {
- return err
- }
- if len(results) != 1 {
- return errors.New("when adding aliases, results must be of length 1")
- }
-
- // we need to get the old host entries before we add the new one to the status
- // if we do not add do it here we will get the wrong existing entries which will throw of the logic
- // we could also copy the map but this does not seem worth it
- // sync the hostNames with c.getHostsEntries()
- hostNames := []string{c.Hostname(), c.config.Name}
- oldHostEntries := etchosts.GetNetworkHostEntries(networkStatus, hostNames...)
-
- // update network status
- if networkStatus == nil {
- networkStatus = make(map[string]types.StatusBlock, 1)
- }
- networkStatus[netName] = results[netName]
- c.state.NetworkStatus = networkStatus
-
- err = c.save()
- if err != nil {
- return err
- }
-
- // The first network needs a port reload to set the correct child ip for the rootlessport process.
- // Adding a second network does not require a port reload because the child ip is still valid.
- if rootless.IsRootless() && len(networks) == 0 {
- if err := c.reloadRootlessRLKPortMapping(); err != nil {
- return err
- }
- }
-
- ipv6, err := c.checkForIPv6(networkStatus)
- if err != nil {
- return err
- }
-
- // Update resolv.conf if required
- stringIPs := make([]string, 0, len(results[netName].DNSServerIPs))
- for _, ip := range results[netName].DNSServerIPs {
- if (ip.To4() == nil) && !ipv6 {
- continue
- }
- stringIPs = append(stringIPs, ip.String())
- }
- if len(stringIPs) > 0 {
- logrus.Debugf("Adding DNS Servers %v to resolv.conf", stringIPs)
- if err := c.addNameserver(stringIPs); err != nil {
- return err
- }
- }
-
- // update /etc/hosts file
- if file, ok := c.state.BindMounts[config.DefaultHostsFile]; ok {
- // make sure to lock this file to prevent concurrent writes when
- // this is used a net dependency container
- lock, err := lockfile.GetLockfile(file)
- if err != nil {
- return fmt.Errorf("failed to lock hosts file: %w", err)
- }
- new := etchosts.GetNetworkHostEntries(results, hostNames...)
- logrus.Debugf("Add /etc/hosts entries %v", new)
- // use special AddIfExists API to make sure we only add new entries if an old one exists
- // see the AddIfExists() comment for more information
- lock.Lock()
- err = etchosts.AddIfExists(file, oldHostEntries, new)
- lock.Unlock()
- if err != nil {
- return err
- }
- }
-
- return nil
-}
-
-// get a free interface name for a new network
-// return an empty string if no free name was found
-func getFreeInterfaceName(networks map[string]types.PerNetworkOptions) string {
- ifNames := make([]string, 0, len(networks))
- for _, opts := range networks {
- ifNames = append(ifNames, opts.InterfaceName)
- }
- for i := 0; i < 100000; i++ {
- ifName := fmt.Sprintf("eth%d", i)
- if !util.StringInSlice(ifName, ifNames) {
- return ifName
- }
- }
- return ""
-}
-
-// DisconnectContainerFromNetwork removes a container from its CNI network
-func (r *Runtime) DisconnectContainerFromNetwork(nameOrID, netName string, force bool) error {
- ctr, err := r.LookupContainer(nameOrID)
- if err != nil {
- return err
- }
- return ctr.NetworkDisconnect(nameOrID, netName, force)
-}
-
-// ConnectContainerToNetwork connects a container to a CNI network
-func (r *Runtime) ConnectContainerToNetwork(nameOrID, netName string, netOpts types.PerNetworkOptions) error {
- ctr, err := r.LookupContainer(nameOrID)
- if err != nil {
- return err
- }
- return ctr.NetworkConnect(nameOrID, netName, netOpts)
-}
-
-// normalizeNetworkName takes a network name, a partial or a full network ID and returns the network name.
-// If the network is not found a errors is returned.
-func (r *Runtime) normalizeNetworkName(nameOrID string) (string, error) {
- net, err := r.network.NetworkInspect(nameOrID)
- if err != nil {
- return "", err
- }
- return net.Name, nil
-}
-
-// ocicniPortsToNetTypesPorts convert the old port format to the new one
-// while deduplicating ports into ranges
-func ocicniPortsToNetTypesPorts(ports []types.OCICNIPortMapping) []types.PortMapping {
- if len(ports) == 0 {
- return nil
- }
-
- newPorts := make([]types.PortMapping, 0, len(ports))
-
- // first sort the ports
- sort.Slice(ports, func(i, j int) bool {
- return compareOCICNIPorts(ports[i], ports[j])
- })
-
- // we already check if the slice is empty so we can use the first element
- currentPort := types.PortMapping{
- HostIP: ports[0].HostIP,
- HostPort: uint16(ports[0].HostPort),
- ContainerPort: uint16(ports[0].ContainerPort),
- Protocol: ports[0].Protocol,
- Range: 1,
- }
-
- for i := 1; i < len(ports); i++ {
- if ports[i].HostIP == currentPort.HostIP &&
- ports[i].Protocol == currentPort.Protocol &&
- ports[i].HostPort-int32(currentPort.Range) == int32(currentPort.HostPort) &&
- ports[i].ContainerPort-int32(currentPort.Range) == int32(currentPort.ContainerPort) {
- currentPort.Range++
- } else {
- newPorts = append(newPorts, currentPort)
- currentPort = types.PortMapping{
- HostIP: ports[i].HostIP,
- HostPort: uint16(ports[i].HostPort),
- ContainerPort: uint16(ports[i].ContainerPort),
- Protocol: ports[i].Protocol,
- Range: 1,
- }
- }
- }
- newPorts = append(newPorts, currentPort)
- return newPorts
-}
-
-// compareOCICNIPorts will sort the ocicni ports by
-// 1) host ip
-// 2) protocol
-// 3) hostPort
-// 4) container port
-func compareOCICNIPorts(i, j types.OCICNIPortMapping) bool {
- if i.HostIP != j.HostIP {
- return i.HostIP < j.HostIP
- }
-
- if i.Protocol != j.Protocol {
- return i.Protocol < j.Protocol
- }
-
- if i.HostPort != j.HostPort {
- return i.HostPort < j.HostPort
- }
-
- return i.ContainerPort < j.ContainerPort
-}
diff --git a/libpod/networking_slirp4netns.go b/libpod/networking_slirp4netns.go
index 4a6462d46..d4ec9082b 100644
--- a/libpod/networking_slirp4netns.go
+++ b/libpod/networking_slirp4netns.go
@@ -243,7 +243,7 @@ func (r *Runtime) setupSlirp4netns(ctr *Container, netns ns.NetNS) error {
}
slirpFeatures, err := checkSlirpFlags(path)
if err != nil {
- return fmt.Errorf("error checking slirp4netns binary %s: %q: %w", path, err, err)
+ return fmt.Errorf("checking slirp4netns binary %s: %q: %w", path, err, err)
}
cmdArgs, err := createBasicSlirp4netnsCmdArgs(netOptions, slirpFeatures)
if err != nil {
@@ -405,7 +405,7 @@ func GetSlirp4netnsIP(subnet *net.IPNet) (*net.IP, error) {
}
expectedIP, err := addToIP(slirpSubnet, uint32(100))
if err != nil {
- return nil, fmt.Errorf("error calculating expected ip for slirp4netns: %w", err)
+ return nil, fmt.Errorf("calculating expected ip for slirp4netns: %w", err)
}
return expectedIP, nil
}
@@ -419,7 +419,7 @@ func GetSlirp4netnsGateway(subnet *net.IPNet) (*net.IP, error) {
}
expectedGatewayIP, err := addToIP(slirpSubnet, uint32(2))
if err != nil {
- return nil, fmt.Errorf("error calculating expected gateway ip for slirp4netns: %w", err)
+ return nil, fmt.Errorf("calculating expected gateway ip for slirp4netns: %w", err)
}
return expectedGatewayIP, nil
}
@@ -433,7 +433,7 @@ func GetSlirp4netnsDNS(subnet *net.IPNet) (*net.IP, error) {
}
expectedDNSIP, err := addToIP(slirpSubnet, uint32(3))
if err != nil {
- return nil, fmt.Errorf("error calculating expected dns ip for slirp4netns: %w", err)
+ return nil, fmt.Errorf("calculating expected dns ip for slirp4netns: %w", err)
}
return expectedDNSIP, nil
}
@@ -465,7 +465,7 @@ func waitForSync(syncR *os.File, cmd *exec.Cmd, logFile io.ReadSeeker, timeout t
b := make([]byte, 16)
for {
if err := syncR.SetDeadline(time.Now().Add(timeout)); err != nil {
- return fmt.Errorf("error setting %s pipe timeout: %w", prog, err)
+ return fmt.Errorf("setting %s pipe timeout: %w", prog, err)
}
// FIXME: return err as soon as proc exits, without waiting for timeout
if _, err := syncR.Read(b); err == nil {
@@ -676,7 +676,7 @@ func openSlirp4netnsPort(apiSocket, proto, hostip string, hostport, guestport ui
// successful.
var y map[string]interface{}
if err := json.Unmarshal(buf[0:readLength], &y); err != nil {
- return fmt.Errorf("error parsing error status from slirp4netns: %w", err)
+ return fmt.Errorf("parsing error status from slirp4netns: %w", err)
}
if e, found := y["error"]; found {
return fmt.Errorf("from slirp4netns while setting up port redirection: %v", e)
diff --git a/libpod/networking_unsupported.go b/libpod/networking_unsupported.go
new file mode 100644
index 000000000..e5a6d1456
--- /dev/null
+++ b/libpod/networking_unsupported.go
@@ -0,0 +1,91 @@
+//go:build !linux && !freebsd
+// +build !linux,!freebsd
+
+package libpod
+
+import (
+ "errors"
+ "net"
+ "path/filepath"
+
+ "github.com/containers/common/libnetwork/types"
+ "github.com/containers/podman/v4/libpod/define"
+ "github.com/containers/storage/pkg/lockfile"
+)
+
+type RootlessNetNS struct {
+ dir string
+ Lock lockfile.Locker
+}
+
+// ocicniPortsToNetTypesPorts convert the old port format to the new one
+// while deduplicating ports into ranges
+func ocicniPortsToNetTypesPorts(ports []types.OCICNIPortMapping) []types.PortMapping {
+ return []types.PortMapping{}
+}
+
+func (c *Container) getContainerNetworkInfo() (*define.InspectNetworkSettings, error) {
+ return nil, errors.New("not implemented (*Container) getContainerNetworkInfo")
+}
+
+func (c *Container) setupRootlessNetwork() error {
+ return errors.New("not implemented (*Container) setupRootlessNetwork")
+}
+
+func (r *Runtime) setupNetNS(ctr *Container) error {
+ return errors.New("not implemented (*Runtime) setupNetNS")
+}
+
+// normalizeNetworkName takes a network name, a partial or a full network ID and returns the network name.
+// If the network is not found a errors is returned.
+func (r *Runtime) normalizeNetworkName(nameOrID string) (string, error) {
+ return "", errors.New("not implemented (*Runtime) normalizeNetworkName")
+}
+
+// DisconnectContainerFromNetwork removes a container from its CNI network
+func (r *Runtime) DisconnectContainerFromNetwork(nameOrID, netName string, force bool) error {
+ return errors.New("not implemented (*Runtime) DisconnectContainerFromNetwork")
+}
+
+// ConnectContainerToNetwork connects a container to a CNI network
+func (r *Runtime) ConnectContainerToNetwork(nameOrID, netName string, netOpts types.PerNetworkOptions) error {
+ return errors.New("not implemented (*Runtime) ConnectContainerToNetwork")
+}
+
+// getPath will join the given path to the rootless netns dir
+func (r *RootlessNetNS) getPath(path string) string {
+ return filepath.Join(r.dir, path)
+}
+
+// Do - run the given function in the rootless netns.
+// It does not lock the rootlessCNI lock, the caller
+// should only lock when needed, e.g. for cni operations.
+func (r *RootlessNetNS) Do(toRun func() error) error {
+ return errors.New("not implemented (*RootlessNetNS) Do")
+}
+
+// Cleanup the rootless network namespace if needed.
+// It checks if we have running containers with the bridge network mode.
+// Cleanup() expects that r.Lock is locked
+func (r *RootlessNetNS) Cleanup(runtime *Runtime) error {
+ return errors.New("not implemented (*RootlessNetNS) Cleanup")
+}
+
+// GetRootlessNetNs returns the rootless netns object. If create is set to true
+// the rootless network namespace will be created if it does not exists already.
+// If called as root it returns always nil.
+// On success the returned RootlessCNI lock is locked and must be unlocked by the caller.
+func (r *Runtime) GetRootlessNetNs(new bool) (*RootlessNetNS, error) {
+ return nil, errors.New("not implemented (*Runtime) GetRootlessNetNs")
+}
+
+// convertPortMappings will remove the HostIP part from the ports when running inside podman machine.
+// This is need because a HostIP of 127.0.0.1 would now allow the gvproxy forwarder to reach to open ports.
+// For machine the HostIP must only be used by gvproxy and never in the VM.
+func (c *Container) convertPortMappings() []types.PortMapping {
+ return []types.PortMapping{}
+}
+
+func GetSlirp4netnsIP(subnet *net.IPNet) (*net.IP, error) {
+ return nil, errors.New("not implemented GetSlirp4netnsIP")
+}
diff --git a/libpod/oci.go b/libpod/oci.go
index 70053db1b..e5b9a0dcd 100644
--- a/libpod/oci.go
+++ b/libpod/oci.go
@@ -5,6 +5,7 @@ import (
"github.com/containers/common/pkg/resize"
"github.com/containers/podman/v4/libpod/define"
+ "github.com/opencontainers/runtime-spec/specs-go"
)
// OCIRuntime is an implementation of an OCI runtime.
@@ -148,6 +149,9 @@ type OCIRuntime interface {
// RuntimeInfo returns verbose information about the runtime.
RuntimeInfo() (*define.ConmonInfo, *define.OCIRuntimeInfo, error)
+
+ // UpdateContainer updates the given container's cgroup configuration.
+ UpdateContainer(ctr *Container, res *specs.LinuxResources) error
}
// AttachOptions are options used when attached to a container or an exec
diff --git a/libpod/oci_conmon_attach_common.go b/libpod/oci_conmon_attach_common.go
new file mode 100644
index 000000000..dec749837
--- /dev/null
+++ b/libpod/oci_conmon_attach_common.go
@@ -0,0 +1,305 @@
+//go:build linux || freebsd
+// +build linux freebsd
+
+package libpod
+
+import (
+ "errors"
+ "fmt"
+ "io"
+ "net"
+ "os"
+ "path/filepath"
+ "syscall"
+
+ "github.com/containers/common/pkg/config"
+ "github.com/containers/common/pkg/resize"
+ "github.com/containers/common/pkg/util"
+ "github.com/containers/podman/v4/libpod/define"
+ "github.com/containers/podman/v4/pkg/errorhandling"
+ "github.com/moby/term"
+ "github.com/sirupsen/logrus"
+ "golang.org/x/sys/unix"
+)
+
+/* Sync with stdpipe_t in conmon.c */
+const (
+ AttachPipeStdin = 1
+ AttachPipeStdout = 2
+ AttachPipeStderr = 3
+)
+
+// Attach to the given container.
+// Does not check if state is appropriate.
+// started is only required if startContainer is true.
+func (r *ConmonOCIRuntime) Attach(c *Container, params *AttachOptions) error {
+ passthrough := c.LogDriver() == define.PassthroughLogging
+
+ if params == nil || params.Streams == nil {
+ return fmt.Errorf("must provide parameters to Attach: %w", define.ErrInternal)
+ }
+
+ if !params.Streams.AttachOutput && !params.Streams.AttachError && !params.Streams.AttachInput && !passthrough {
+ return fmt.Errorf("must provide at least one stream to attach to: %w", define.ErrInvalidArg)
+ }
+ if params.Start && params.Started == nil {
+ return fmt.Errorf("started chan not passed when startContainer set: %w", define.ErrInternal)
+ }
+
+ keys := config.DefaultDetachKeys
+ if params.DetachKeys != nil {
+ keys = *params.DetachKeys
+ }
+
+ detachKeys, err := processDetachKeys(keys)
+ if err != nil {
+ return err
+ }
+
+ var conn *net.UnixConn
+ if !passthrough {
+ logrus.Debugf("Attaching to container %s", c.ID())
+
+ // If we have a resize, do it.
+ if params.InitialSize != nil {
+ if err := r.AttachResize(c, *params.InitialSize); err != nil {
+ return err
+ }
+ }
+
+ attachSock, err := c.AttachSocketPath()
+ if err != nil {
+ return err
+ }
+
+ conn, err = openUnixSocket(attachSock)
+ if err != nil {
+ return fmt.Errorf("failed to connect to container's attach socket: %v: %w", attachSock, err)
+ }
+ defer func() {
+ if err := conn.Close(); err != nil {
+ logrus.Errorf("unable to close socket: %q", err)
+ }
+ }()
+ }
+
+ // If starting was requested, start the container and notify when that's
+ // done.
+ if params.Start {
+ if err := c.start(); err != nil {
+ return err
+ }
+ params.Started <- true
+ }
+
+ if passthrough {
+ return nil
+ }
+
+ receiveStdoutError, stdinDone := setupStdioChannels(params.Streams, conn, detachKeys)
+ if params.AttachReady != nil {
+ params.AttachReady <- true
+ }
+ return readStdio(conn, params.Streams, receiveStdoutError, stdinDone)
+}
+
+// Attach to the given container's exec session
+// attachFd and startFd must be open file descriptors
+// attachFd must be the output side of the fd. attachFd is used for two things:
+// conmon will first send a nonce value across the pipe indicating it has set up its side of the console socket
+// this ensures attachToExec gets all of the output of the called process
+// conmon will then send the exit code of the exec process, or an error in the exec session
+// startFd must be the input side of the fd.
+// newSize resizes the tty to this size before the process is started, must be nil if the exec session has no tty
+// conmon will wait to start the exec session until the parent process has set up the console socket.
+// Once attachToExec successfully attaches to the console socket, the child conmon process responsible for calling runtime exec
+// will read from the output side of start fd, thus learning to start the child process.
+// Thus, the order goes as follow:
+// 1. conmon parent process sets up its console socket. sends on attachFd
+// 2. attachToExec attaches to the console socket after reading on attachFd and resizes the tty
+// 3. child waits on startFd for attachToExec to attach to said console socket
+// 4. attachToExec sends on startFd, signalling it has attached to the socket and child is ready to go
+// 5. child receives on startFd, runs the runtime exec command
+// attachToExec is responsible for closing startFd and attachFd
+func (c *Container) attachToExec(streams *define.AttachStreams, keys *string, sessionID string, startFd, attachFd *os.File, newSize *resize.TerminalSize) error {
+ if !streams.AttachOutput && !streams.AttachError && !streams.AttachInput {
+ return fmt.Errorf("must provide at least one stream to attach to: %w", define.ErrInvalidArg)
+ }
+ if startFd == nil || attachFd == nil {
+ return fmt.Errorf("start sync pipe and attach sync pipe must be defined for exec attach: %w", define.ErrInvalidArg)
+ }
+
+ defer errorhandling.CloseQuiet(startFd)
+ defer errorhandling.CloseQuiet(attachFd)
+
+ detachString := config.DefaultDetachKeys
+ if keys != nil {
+ detachString = *keys
+ }
+ detachKeys, err := processDetachKeys(detachString)
+ if err != nil {
+ return err
+ }
+
+ logrus.Debugf("Attaching to container %s exec session %s", c.ID(), sessionID)
+
+ // set up the socket path, such that it is the correct length and location for exec
+ sockPath, err := c.execAttachSocketPath(sessionID)
+ if err != nil {
+ return err
+ }
+
+ // 2: read from attachFd that the parent process has set up the console socket
+ if _, err := readConmonPipeData(c.ociRuntime.Name(), attachFd, ""); err != nil {
+ return err
+ }
+
+ // resize before we start the container process
+ if newSize != nil {
+ err = c.ociRuntime.ExecAttachResize(c, sessionID, *newSize)
+ if err != nil {
+ logrus.Warnf("Resize failed: %v", err)
+ }
+ }
+
+ // 2: then attach
+ conn, err := openUnixSocket(sockPath)
+ if err != nil {
+ return fmt.Errorf("failed to connect to container's attach socket: %v: %w", sockPath, err)
+ }
+ defer func() {
+ if err := conn.Close(); err != nil {
+ logrus.Errorf("Unable to close socket: %q", err)
+ }
+ }()
+
+ // start listening on stdio of the process
+ receiveStdoutError, stdinDone := setupStdioChannels(streams, conn, detachKeys)
+
+ // 4: send start message to child
+ if err := writeConmonPipeData(startFd); err != nil {
+ return err
+ }
+
+ return readStdio(conn, streams, receiveStdoutError, stdinDone)
+}
+
+func processDetachKeys(keys string) ([]byte, error) {
+ // Check the validity of the provided keys first
+ if len(keys) == 0 {
+ return []byte{}, nil
+ }
+ detachKeys, err := term.ToBytes(keys)
+ if err != nil {
+ return nil, fmt.Errorf("invalid detach keys: %w", err)
+ }
+ return detachKeys, nil
+}
+
+func registerResizeFunc(r <-chan resize.TerminalSize, bundlePath string) {
+ resize.HandleResizing(r, func(size resize.TerminalSize) {
+ controlPath := filepath.Join(bundlePath, "ctl")
+ controlFile, err := os.OpenFile(controlPath, unix.O_WRONLY, 0)
+ if err != nil {
+ logrus.Debugf("Could not open ctl file: %v", err)
+ return
+ }
+ defer controlFile.Close()
+
+ logrus.Debugf("Received a resize event: %+v", size)
+ if _, err = fmt.Fprintf(controlFile, "%d %d %d\n", 1, size.Height, size.Width); err != nil {
+ logrus.Warnf("Failed to write to control file to resize terminal: %v", err)
+ }
+ })
+}
+
+func setupStdioChannels(streams *define.AttachStreams, conn *net.UnixConn, detachKeys []byte) (chan error, chan error) {
+ receiveStdoutError := make(chan error)
+ go func() {
+ receiveStdoutError <- redirectResponseToOutputStreams(streams.OutputStream, streams.ErrorStream, streams.AttachOutput, streams.AttachError, conn)
+ }()
+
+ stdinDone := make(chan error)
+ go func() {
+ var err error
+ if streams.AttachInput {
+ _, err = util.CopyDetachable(conn, streams.InputStream, detachKeys)
+ }
+ stdinDone <- err
+ }()
+
+ return receiveStdoutError, stdinDone
+}
+
+func redirectResponseToOutputStreams(outputStream, errorStream io.Writer, writeOutput, writeError bool, conn io.Reader) error {
+ var err error
+ buf := make([]byte, 8192+1) /* Sync with conmon STDIO_BUF_SIZE */
+ for {
+ nr, er := conn.Read(buf)
+ if nr > 0 {
+ var dst io.Writer
+ var doWrite bool
+ switch buf[0] {
+ case AttachPipeStdout:
+ dst = outputStream
+ doWrite = writeOutput
+ case AttachPipeStderr:
+ dst = errorStream
+ doWrite = writeError
+ default:
+ logrus.Infof("Received unexpected attach type %+d", buf[0])
+ }
+ if dst == nil {
+ return errors.New("output destination cannot be nil")
+ }
+
+ if doWrite {
+ nw, ew := dst.Write(buf[1:nr])
+ if ew != nil {
+ err = ew
+ break
+ }
+ if nr != nw+1 {
+ err = io.ErrShortWrite
+ break
+ }
+ }
+ }
+ if errors.Is(er, io.EOF) || errors.Is(er, syscall.ECONNRESET) {
+ break
+ }
+ if er != nil {
+ err = er
+ break
+ }
+ }
+ return err
+}
+
+func readStdio(conn *net.UnixConn, streams *define.AttachStreams, receiveStdoutError, stdinDone chan error) error {
+ var err error
+ select {
+ case err = <-receiveStdoutError:
+ if err := socketCloseWrite(conn); err != nil {
+ logrus.Errorf("Failed to close stdin: %v", err)
+ }
+ return err
+ case err = <-stdinDone:
+ if err == define.ErrDetach {
+ if err := socketCloseWrite(conn); err != nil {
+ logrus.Errorf("Failed to close stdin: %v", err)
+ }
+ return err
+ }
+ if err == nil {
+ // copy stdin is done, close it
+ if connErr := socketCloseWrite(conn); connErr != nil {
+ logrus.Errorf("Unable to close conn: %v", connErr)
+ }
+ }
+ if streams.AttachOutput || streams.AttachError {
+ return <-receiveStdoutError
+ }
+ }
+ return nil
+}
diff --git a/libpod/oci_conmon_attach_freebsd.go b/libpod/oci_conmon_attach_freebsd.go
new file mode 100644
index 000000000..de0054381
--- /dev/null
+++ b/libpod/oci_conmon_attach_freebsd.go
@@ -0,0 +1,21 @@
+package libpod
+
+import (
+ "net"
+ "os"
+ "path/filepath"
+)
+
+func openUnixSocket(path string) (*net.UnixConn, error) {
+ // socket paths can be too long to fit into a sockaddr_un so we create a shorter symlink.
+ tmpdir, err := os.MkdirTemp("", "podman")
+ if err != nil {
+ return nil, err
+ }
+ defer os.RemoveAll(tmpdir)
+ tmpsockpath := filepath.Join(tmpdir, "sock")
+ if err := os.Symlink(path, tmpsockpath); err != nil {
+ return nil, err
+ }
+ return net.DialUnix("unixpacket", nil, &net.UnixAddr{Name: tmpsockpath, Net: "unixpacket"})
+}
diff --git a/libpod/oci_conmon_attach_linux.go b/libpod/oci_conmon_attach_linux.go
index aa55aa6f5..f1aa89d3e 100644
--- a/libpod/oci_conmon_attach_linux.go
+++ b/libpod/oci_conmon_attach_linux.go
@@ -1,34 +1,12 @@
-//go:build linux
-// +build linux
-
package libpod
import (
- "errors"
"fmt"
- "io"
"net"
- "os"
- "path/filepath"
- "syscall"
- "github.com/containers/common/pkg/config"
- "github.com/containers/common/pkg/resize"
- "github.com/containers/common/pkg/util"
- "github.com/containers/podman/v4/libpod/define"
- "github.com/containers/podman/v4/pkg/errorhandling"
- "github.com/moby/term"
- "github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
-/* Sync with stdpipe_t in conmon.c */
-const (
- AttachPipeStdin = 1
- AttachPipeStdout = 2
- AttachPipeStderr = 3
-)
-
func openUnixSocket(path string) (*net.UnixConn, error) {
fd, err := unix.Open(path, unix.O_PATH, 0)
if err != nil {
@@ -37,278 +15,3 @@ func openUnixSocket(path string) (*net.UnixConn, error) {
defer unix.Close(fd)
return net.DialUnix("unixpacket", nil, &net.UnixAddr{Name: fmt.Sprintf("/proc/self/fd/%d", fd), Net: "unixpacket"})
}
-
-// Attach to the given container.
-// Does not check if state is appropriate.
-// started is only required if startContainer is true.
-func (r *ConmonOCIRuntime) Attach(c *Container, params *AttachOptions) error {
- passthrough := c.LogDriver() == define.PassthroughLogging
-
- if params == nil || params.Streams == nil {
- return fmt.Errorf("must provide parameters to Attach: %w", define.ErrInternal)
- }
-
- if !params.Streams.AttachOutput && !params.Streams.AttachError && !params.Streams.AttachInput && !passthrough {
- return fmt.Errorf("must provide at least one stream to attach to: %w", define.ErrInvalidArg)
- }
- if params.Start && params.Started == nil {
- return fmt.Errorf("started chan not passed when startContainer set: %w", define.ErrInternal)
- }
-
- keys := config.DefaultDetachKeys
- if params.DetachKeys != nil {
- keys = *params.DetachKeys
- }
-
- detachKeys, err := processDetachKeys(keys)
- if err != nil {
- return err
- }
-
- var conn *net.UnixConn
- if !passthrough {
- logrus.Debugf("Attaching to container %s", c.ID())
-
- // If we have a resize, do it.
- if params.InitialSize != nil {
- if err := r.AttachResize(c, *params.InitialSize); err != nil {
- return err
- }
- }
-
- attachSock, err := c.AttachSocketPath()
- if err != nil {
- return err
- }
-
- conn, err = openUnixSocket(attachSock)
- if err != nil {
- return fmt.Errorf("failed to connect to container's attach socket: %v: %w", attachSock, err)
- }
- defer func() {
- if err := conn.Close(); err != nil {
- logrus.Errorf("unable to close socket: %q", err)
- }
- }()
- }
-
- // If starting was requested, start the container and notify when that's
- // done.
- if params.Start {
- if err := c.start(); err != nil {
- return err
- }
- params.Started <- true
- }
-
- if passthrough {
- return nil
- }
-
- receiveStdoutError, stdinDone := setupStdioChannels(params.Streams, conn, detachKeys)
- if params.AttachReady != nil {
- params.AttachReady <- true
- }
- return readStdio(conn, params.Streams, receiveStdoutError, stdinDone)
-}
-
-// Attach to the given container's exec session
-// attachFd and startFd must be open file descriptors
-// attachFd must be the output side of the fd. attachFd is used for two things:
-// conmon will first send a nonce value across the pipe indicating it has set up its side of the console socket
-// this ensures attachToExec gets all of the output of the called process
-// conmon will then send the exit code of the exec process, or an error in the exec session
-// startFd must be the input side of the fd.
-// newSize resizes the tty to this size before the process is started, must be nil if the exec session has no tty
-// conmon will wait to start the exec session until the parent process has set up the console socket.
-// Once attachToExec successfully attaches to the console socket, the child conmon process responsible for calling runtime exec
-// will read from the output side of start fd, thus learning to start the child process.
-// Thus, the order goes as follow:
-// 1. conmon parent process sets up its console socket. sends on attachFd
-// 2. attachToExec attaches to the console socket after reading on attachFd and resizes the tty
-// 3. child waits on startFd for attachToExec to attach to said console socket
-// 4. attachToExec sends on startFd, signalling it has attached to the socket and child is ready to go
-// 5. child receives on startFd, runs the runtime exec command
-// attachToExec is responsible for closing startFd and attachFd
-func (c *Container) attachToExec(streams *define.AttachStreams, keys *string, sessionID string, startFd, attachFd *os.File, newSize *resize.TerminalSize) error {
- if !streams.AttachOutput && !streams.AttachError && !streams.AttachInput {
- return fmt.Errorf("must provide at least one stream to attach to: %w", define.ErrInvalidArg)
- }
- if startFd == nil || attachFd == nil {
- return fmt.Errorf("start sync pipe and attach sync pipe must be defined for exec attach: %w", define.ErrInvalidArg)
- }
-
- defer errorhandling.CloseQuiet(startFd)
- defer errorhandling.CloseQuiet(attachFd)
-
- detachString := config.DefaultDetachKeys
- if keys != nil {
- detachString = *keys
- }
- detachKeys, err := processDetachKeys(detachString)
- if err != nil {
- return err
- }
-
- logrus.Debugf("Attaching to container %s exec session %s", c.ID(), sessionID)
-
- // set up the socket path, such that it is the correct length and location for exec
- sockPath, err := c.execAttachSocketPath(sessionID)
- if err != nil {
- return err
- }
-
- // 2: read from attachFd that the parent process has set up the console socket
- if _, err := readConmonPipeData(c.ociRuntime.Name(), attachFd, ""); err != nil {
- return err
- }
-
- // resize before we start the container process
- if newSize != nil {
- err = c.ociRuntime.ExecAttachResize(c, sessionID, *newSize)
- if err != nil {
- logrus.Warnf("Resize failed: %v", err)
- }
- }
-
- // 2: then attach
- conn, err := openUnixSocket(sockPath)
- if err != nil {
- return fmt.Errorf("failed to connect to container's attach socket: %v: %w", sockPath, err)
- }
- defer func() {
- if err := conn.Close(); err != nil {
- logrus.Errorf("Unable to close socket: %q", err)
- }
- }()
-
- // start listening on stdio of the process
- receiveStdoutError, stdinDone := setupStdioChannels(streams, conn, detachKeys)
-
- // 4: send start message to child
- if err := writeConmonPipeData(startFd); err != nil {
- return err
- }
-
- return readStdio(conn, streams, receiveStdoutError, stdinDone)
-}
-
-func processDetachKeys(keys string) ([]byte, error) {
- // Check the validity of the provided keys first
- if len(keys) == 0 {
- return []byte{}, nil
- }
- detachKeys, err := term.ToBytes(keys)
- if err != nil {
- return nil, fmt.Errorf("invalid detach keys: %w", err)
- }
- return detachKeys, nil
-}
-
-func registerResizeFunc(r <-chan resize.TerminalSize, bundlePath string) {
- resize.HandleResizing(r, func(size resize.TerminalSize) {
- controlPath := filepath.Join(bundlePath, "ctl")
- controlFile, err := os.OpenFile(controlPath, unix.O_WRONLY, 0)
- if err != nil {
- logrus.Debugf("Could not open ctl file: %v", err)
- return
- }
- defer controlFile.Close()
-
- logrus.Debugf("Received a resize event: %+v", size)
- if _, err = fmt.Fprintf(controlFile, "%d %d %d\n", 1, size.Height, size.Width); err != nil {
- logrus.Warnf("Failed to write to control file to resize terminal: %v", err)
- }
- })
-}
-
-func setupStdioChannels(streams *define.AttachStreams, conn *net.UnixConn, detachKeys []byte) (chan error, chan error) {
- receiveStdoutError := make(chan error)
- go func() {
- receiveStdoutError <- redirectResponseToOutputStreams(streams.OutputStream, streams.ErrorStream, streams.AttachOutput, streams.AttachError, conn)
- }()
-
- stdinDone := make(chan error)
- go func() {
- var err error
- if streams.AttachInput {
- _, err = util.CopyDetachable(conn, streams.InputStream, detachKeys)
- }
- stdinDone <- err
- }()
-
- return receiveStdoutError, stdinDone
-}
-
-func redirectResponseToOutputStreams(outputStream, errorStream io.Writer, writeOutput, writeError bool, conn io.Reader) error {
- var err error
- buf := make([]byte, 8192+1) /* Sync with conmon STDIO_BUF_SIZE */
- for {
- nr, er := conn.Read(buf)
- if nr > 0 {
- var dst io.Writer
- var doWrite bool
- switch buf[0] {
- case AttachPipeStdout:
- dst = outputStream
- doWrite = writeOutput
- case AttachPipeStderr:
- dst = errorStream
- doWrite = writeError
- default:
- logrus.Infof("Received unexpected attach type %+d", buf[0])
- }
- if dst == nil {
- return errors.New("output destination cannot be nil")
- }
-
- if doWrite {
- nw, ew := dst.Write(buf[1:nr])
- if ew != nil {
- err = ew
- break
- }
- if nr != nw+1 {
- err = io.ErrShortWrite
- break
- }
- }
- }
- if errors.Is(er, io.EOF) || errors.Is(er, syscall.ECONNRESET) {
- break
- }
- if er != nil {
- err = er
- break
- }
- }
- return err
-}
-
-func readStdio(conn *net.UnixConn, streams *define.AttachStreams, receiveStdoutError, stdinDone chan error) error {
- var err error
- select {
- case err = <-receiveStdoutError:
- if err := conn.CloseWrite(); err != nil {
- logrus.Errorf("Failed to close stdin: %v", err)
- }
- return err
- case err = <-stdinDone:
- if err == define.ErrDetach {
- if err := conn.CloseWrite(); err != nil {
- logrus.Errorf("Failed to close stdin: %v", err)
- }
- return err
- }
- if err == nil {
- // copy stdin is done, close it
- if connErr := conn.CloseWrite(); connErr != nil {
- logrus.Errorf("Unable to close conn: %v", connErr)
- }
- }
- if streams.AttachOutput || streams.AttachError {
- return <-receiveStdoutError
- }
- }
- return nil
-}
diff --git a/libpod/oci_conmon_common.go b/libpod/oci_conmon_common.go
new file mode 100644
index 000000000..53dddd064
--- /dev/null
+++ b/libpod/oci_conmon_common.go
@@ -0,0 +1,1650 @@
+//go:build linux || freebsd
+// +build linux freebsd
+
+package libpod
+
+import (
+ "bufio"
+ "bytes"
+ "context"
+ "errors"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "net"
+ "net/http"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "strconv"
+ "strings"
+ "sync"
+ "syscall"
+ "text/template"
+ "time"
+
+ "github.com/containers/common/pkg/config"
+ "github.com/containers/common/pkg/resize"
+ cutil "github.com/containers/common/pkg/util"
+ conmonConfig "github.com/containers/conmon/runner/config"
+ "github.com/containers/podman/v4/libpod/define"
+ "github.com/containers/podman/v4/libpod/logs"
+ "github.com/containers/podman/v4/pkg/checkpoint/crutils"
+ "github.com/containers/podman/v4/pkg/errorhandling"
+ "github.com/containers/podman/v4/pkg/rootless"
+ "github.com/containers/podman/v4/pkg/specgenutil"
+ "github.com/containers/podman/v4/pkg/util"
+ "github.com/containers/podman/v4/utils"
+ "github.com/containers/storage/pkg/homedir"
+ spec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/sirupsen/logrus"
+ "golang.org/x/sys/unix"
+)
+
+const (
+ // This is Conmon's STDIO_BUF_SIZE. I don't believe we have access to it
+ // directly from the Go code, so const it here
+ // Important: The conmon attach socket uses an extra byte at the beginning of each
+ // message to specify the STREAM so we have to increase the buffer size by one
+ bufferSize = conmonConfig.BufSize + 1
+)
+
+// ConmonOCIRuntime is an OCI runtime managed by Conmon.
+// TODO: Make all calls to OCI runtime have a timeout.
+type ConmonOCIRuntime struct {
+ name string
+ path string
+ conmonPath string
+ conmonEnv []string
+ tmpDir string
+ exitsDir string
+ logSizeMax int64
+ noPivot bool
+ reservePorts bool
+ runtimeFlags []string
+ supportsJSON bool
+ supportsKVM bool
+ supportsNoCgroups bool
+ enableKeyring bool
+}
+
+// Make a new Conmon-based OCI runtime with the given options.
+// Conmon will wrap the given OCI runtime, which can be `runc`, `crun`, or
+// any runtime with a runc-compatible CLI.
+// The first path that points to a valid executable will be used.
+// Deliberately private. Someone should not be able to construct this outside of
+// libpod.
+func newConmonOCIRuntime(name string, paths []string, conmonPath string, runtimeFlags []string, runtimeCfg *config.Config) (OCIRuntime, error) {
+ if name == "" {
+ return nil, fmt.Errorf("the OCI runtime must be provided a non-empty name: %w", define.ErrInvalidArg)
+ }
+
+ // Make lookup tables for runtime support
+ supportsJSON := make(map[string]bool, len(runtimeCfg.Engine.RuntimeSupportsJSON))
+ supportsNoCgroups := make(map[string]bool, len(runtimeCfg.Engine.RuntimeSupportsNoCgroups))
+ supportsKVM := make(map[string]bool, len(runtimeCfg.Engine.RuntimeSupportsKVM))
+ for _, r := range runtimeCfg.Engine.RuntimeSupportsJSON {
+ supportsJSON[r] = true
+ }
+ for _, r := range runtimeCfg.Engine.RuntimeSupportsNoCgroups {
+ supportsNoCgroups[r] = true
+ }
+ for _, r := range runtimeCfg.Engine.RuntimeSupportsKVM {
+ supportsKVM[r] = true
+ }
+
+ runtime := new(ConmonOCIRuntime)
+ runtime.name = name
+ runtime.conmonPath = conmonPath
+ runtime.runtimeFlags = runtimeFlags
+
+ runtime.conmonEnv = runtimeCfg.Engine.ConmonEnvVars
+ runtime.tmpDir = runtimeCfg.Engine.TmpDir
+ runtime.logSizeMax = runtimeCfg.Containers.LogSizeMax
+ runtime.noPivot = runtimeCfg.Engine.NoPivotRoot
+ runtime.reservePorts = runtimeCfg.Engine.EnablePortReservation
+ runtime.enableKeyring = runtimeCfg.Containers.EnableKeyring
+
+ // TODO: probe OCI runtime for feature and enable automatically if
+ // available.
+
+ base := filepath.Base(name)
+ runtime.supportsJSON = supportsJSON[base]
+ runtime.supportsNoCgroups = supportsNoCgroups[base]
+ runtime.supportsKVM = supportsKVM[base]
+
+ foundPath := false
+ for _, path := range paths {
+ stat, err := os.Stat(path)
+ if err != nil {
+ if os.IsNotExist(err) {
+ continue
+ }
+ return nil, fmt.Errorf("cannot stat OCI runtime %s path: %w", name, err)
+ }
+ if !stat.Mode().IsRegular() {
+ continue
+ }
+ foundPath = true
+ logrus.Tracef("found runtime %q", path)
+ runtime.path = path
+ break
+ }
+
+ // Search the $PATH as last fallback
+ if !foundPath {
+ if foundRuntime, err := exec.LookPath(name); err == nil {
+ foundPath = true
+ runtime.path = foundRuntime
+ logrus.Debugf("using runtime %q from $PATH: %q", name, foundRuntime)
+ }
+ }
+
+ if !foundPath {
+ return nil, fmt.Errorf("no valid executable found for OCI runtime %s: %w", name, define.ErrInvalidArg)
+ }
+
+ runtime.exitsDir = filepath.Join(runtime.tmpDir, "exits")
+
+ // Create the exit files and attach sockets directories
+ if err := os.MkdirAll(runtime.exitsDir, 0750); err != nil {
+ // The directory is allowed to exist
+ if !os.IsExist(err) {
+ return nil, fmt.Errorf("creating OCI runtime exit files directory: %w", err)
+ }
+ }
+ return runtime, nil
+}
+
+// Name returns the name of the runtime being wrapped by Conmon.
+func (r *ConmonOCIRuntime) Name() string {
+ return r.name
+}
+
+// Path returns the path of the OCI runtime being wrapped by Conmon.
+func (r *ConmonOCIRuntime) Path() string {
+ return r.path
+}
+
+// hasCurrentUserMapped checks whether the current user is mapped inside the container user namespace
+func hasCurrentUserMapped(ctr *Container) bool {
+ if len(ctr.config.IDMappings.UIDMap) == 0 && len(ctr.config.IDMappings.GIDMap) == 0 {
+ return true
+ }
+ uid := os.Geteuid()
+ for _, m := range ctr.config.IDMappings.UIDMap {
+ if uid >= m.HostID && uid < m.HostID+m.Size {
+ return true
+ }
+ }
+ return false
+}
+
+// CreateContainer creates a container.
+func (r *ConmonOCIRuntime) CreateContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (int64, error) {
+ // always make the run dir accessible to the current user so that the PID files can be read without
+ // being in the rootless user namespace.
+ if err := makeAccessible(ctr.state.RunDir, 0, 0); err != nil {
+ return 0, err
+ }
+ if !hasCurrentUserMapped(ctr) {
+ for _, i := range []string{ctr.state.RunDir, ctr.runtime.config.Engine.TmpDir, ctr.config.StaticDir, ctr.state.Mountpoint, ctr.runtime.config.Engine.VolumePath} {
+ if err := makeAccessible(i, ctr.RootUID(), ctr.RootGID()); err != nil {
+ return 0, err
+ }
+ }
+
+ // if we are running a non privileged container, be sure to umount some kernel paths so they are not
+ // bind mounted inside the container at all.
+ if !ctr.config.Privileged && !rootless.IsRootless() {
+ return r.createRootlessContainer(ctr, restoreOptions)
+ }
+ }
+ return r.createOCIContainer(ctr, restoreOptions)
+}
+
+// UpdateContainerStatus retrieves the current status of the container from the
+// runtime. It updates the container's state but does not save it.
+// If useRuntime is false, we will not directly hit runc to see the container's
+// status, but will instead only check for the existence of the conmon exit file
+// and update state to stopped if it exists.
+func (r *ConmonOCIRuntime) UpdateContainerStatus(ctr *Container) error {
+ runtimeDir, err := util.GetRuntimeDir()
+ if err != nil {
+ return err
+ }
+
+ // Store old state so we know if we were already stopped
+ oldState := ctr.state.State
+
+ state := new(spec.State)
+
+ cmd := exec.Command(r.path, "state", ctr.ID())
+ cmd.Env = append(cmd.Env, fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir))
+
+ outPipe, err := cmd.StdoutPipe()
+ if err != nil {
+ return fmt.Errorf("getting stdout pipe: %w", err)
+ }
+ errPipe, err := cmd.StderrPipe()
+ if err != nil {
+ return fmt.Errorf("getting stderr pipe: %w", err)
+ }
+
+ if err := cmd.Start(); err != nil {
+ out, err2 := ioutil.ReadAll(errPipe)
+ if err2 != nil {
+ return fmt.Errorf("getting container %s state: %w", ctr.ID(), err)
+ }
+ if strings.Contains(string(out), "does not exist") || strings.Contains(string(out), "No such file") {
+ if err := ctr.removeConmonFiles(); err != nil {
+ logrus.Debugf("unable to remove conmon files for container %s", ctr.ID())
+ }
+ ctr.state.ExitCode = -1
+ ctr.state.FinishedTime = time.Now()
+ ctr.state.State = define.ContainerStateExited
+ return ctr.runtime.state.AddContainerExitCode(ctr.ID(), ctr.state.ExitCode)
+ }
+ return fmt.Errorf("getting container %s state. stderr/out: %s: %w", ctr.ID(), out, err)
+ }
+ defer func() {
+ _ = cmd.Wait()
+ }()
+
+ if err := errPipe.Close(); err != nil {
+ return err
+ }
+ out, err := ioutil.ReadAll(outPipe)
+ if err != nil {
+ return fmt.Errorf("reading stdout: %s: %w", ctr.ID(), err)
+ }
+ if err := json.NewDecoder(bytes.NewBuffer(out)).Decode(state); err != nil {
+ return fmt.Errorf("decoding container status for container %s: %w", ctr.ID(), err)
+ }
+ ctr.state.PID = state.Pid
+
+ switch state.Status {
+ case "created":
+ ctr.state.State = define.ContainerStateCreated
+ case "paused":
+ ctr.state.State = define.ContainerStatePaused
+ case "running":
+ ctr.state.State = define.ContainerStateRunning
+ case "stopped":
+ ctr.state.State = define.ContainerStateStopped
+ default:
+ return fmt.Errorf("unrecognized status returned by runtime for container %s: %s: %w",
+ ctr.ID(), state.Status, define.ErrInternal)
+ }
+
+ // Handle ContainerStateStopping - keep it unless the container
+ // transitioned to no longer running.
+ if oldState == define.ContainerStateStopping && (ctr.state.State == define.ContainerStatePaused || ctr.state.State == define.ContainerStateRunning) {
+ ctr.state.State = define.ContainerStateStopping
+ }
+
+ return nil
+}
+
+// StartContainer starts the given container.
+// Sets time the container was started, but does not save it.
+func (r *ConmonOCIRuntime) StartContainer(ctr *Container) error {
+ // TODO: streams should probably *not* be our STDIN/OUT/ERR - redirect to buffers?
+ runtimeDir, err := util.GetRuntimeDir()
+ if err != nil {
+ return err
+ }
+ env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
+ if path, ok := os.LookupEnv("PATH"); ok {
+ env = append(env, fmt.Sprintf("PATH=%s", path))
+ }
+ if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, append(r.runtimeFlags, "start", ctr.ID())...); err != nil {
+ return err
+ }
+
+ ctr.state.StartedTime = time.Now()
+
+ return nil
+}
+
+// UpdateContainer updates the given container's cgroup configuration
+func (r *ConmonOCIRuntime) UpdateContainer(ctr *Container, resources *spec.LinuxResources) error {
+ runtimeDir, err := util.GetRuntimeDir()
+ if err != nil {
+ return err
+ }
+ env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
+ if path, ok := os.LookupEnv("PATH"); ok {
+ env = append(env, fmt.Sprintf("PATH=%s", path))
+ }
+ args := r.runtimeFlags
+ args = append(args, "update")
+ tempFile, additionalArgs, err := generateResourceFile(resources)
+ if err != nil {
+ return err
+ }
+ defer os.Remove(tempFile)
+
+ args = append(args, additionalArgs...)
+ return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, append(args, ctr.ID())...)
+}
+
+func generateResourceFile(res *spec.LinuxResources) (string, []string, error) {
+ flags := []string{}
+ if res == nil {
+ return "", flags, nil
+ }
+
+ f, err := ioutil.TempFile("", "podman")
+ if err != nil {
+ return "", nil, err
+ }
+
+ j, err := json.Marshal(res)
+ if err != nil {
+ return "", nil, err
+ }
+ _, err = f.WriteString(string(j))
+ if err != nil {
+ return "", nil, err
+ }
+
+ flags = append(flags, "--resources="+f.Name())
+ return f.Name(), flags, nil
+}
+
+// KillContainer sends the given signal to the given container.
+// If all is set, send to all PIDs in the container.
+// All is only supported if the container created cgroups.
+func (r *ConmonOCIRuntime) KillContainer(ctr *Container, signal uint, all bool) error {
+ logrus.Debugf("Sending signal %d to container %s", signal, ctr.ID())
+ runtimeDir, err := util.GetRuntimeDir()
+ if err != nil {
+ return err
+ }
+ env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
+ var args []string
+ args = append(args, r.runtimeFlags...)
+ if all {
+ args = append(args, "kill", "--all", ctr.ID(), fmt.Sprintf("%d", signal))
+ } else {
+ args = append(args, "kill", ctr.ID(), fmt.Sprintf("%d", signal))
+ }
+ if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, args...); err != nil {
+ // Update container state - there's a chance we failed because
+ // the container exited in the meantime.
+ if err2 := r.UpdateContainerStatus(ctr); err2 != nil {
+ logrus.Infof("Error updating status for container %s: %v", ctr.ID(), err2)
+ }
+ if ctr.ensureState(define.ContainerStateStopped, define.ContainerStateExited) {
+ return define.ErrCtrStateInvalid
+ }
+ return fmt.Errorf("sending signal to container %s: %w", ctr.ID(), err)
+ }
+
+ return nil
+}
+
+// StopContainer stops a container, first using its given stop signal (or
+// SIGTERM if no signal was specified), then using SIGKILL.
+// Timeout is given in seconds. If timeout is 0, the container will be
+// immediately kill with SIGKILL.
+// Does not set finished time for container, assumes you will run updateStatus
+// after to pull the exit code.
+func (r *ConmonOCIRuntime) StopContainer(ctr *Container, timeout uint, all bool) error {
+ logrus.Debugf("Stopping container %s (PID %d)", ctr.ID(), ctr.state.PID)
+
+ // Ping the container to see if it's alive
+ // If it's not, it's already stopped, return
+ err := unix.Kill(ctr.state.PID, 0)
+ if err == unix.ESRCH {
+ return nil
+ }
+
+ stopSignal := ctr.config.StopSignal
+ if stopSignal == 0 {
+ stopSignal = uint(syscall.SIGTERM)
+ }
+
+ if timeout > 0 {
+ if err := r.KillContainer(ctr, stopSignal, all); err != nil {
+ // Is the container gone?
+ // If so, it probably died between the first check and
+ // our sending the signal
+ // The container is stopped, so exit cleanly
+ err := unix.Kill(ctr.state.PID, 0)
+ if err == unix.ESRCH {
+ return nil
+ }
+
+ return err
+ }
+
+ if err := waitContainerStop(ctr, time.Duration(timeout)*time.Second); err != nil {
+ logrus.Debugf("Timed out stopping container %s with %s, resorting to SIGKILL: %v", ctr.ID(), unix.SignalName(syscall.Signal(stopSignal)), err)
+ logrus.Warnf("StopSignal %s failed to stop container %s in %d seconds, resorting to SIGKILL", unix.SignalName(syscall.Signal(stopSignal)), ctr.Name(), timeout)
+ } else {
+ // No error, the container is dead
+ return nil
+ }
+ }
+
+ if err := r.KillContainer(ctr, uint(unix.SIGKILL), all); err != nil {
+ // Again, check if the container is gone. If it is, exit cleanly.
+ if aliveErr := unix.Kill(ctr.state.PID, 0); errors.Is(aliveErr, unix.ESRCH) {
+ return nil
+ }
+ return fmt.Errorf("sending SIGKILL to container %s: %w", ctr.ID(), err)
+ }
+
+ // Give runtime a few seconds to make it happen
+ if err := waitContainerStop(ctr, killContainerTimeout); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+// DeleteContainer deletes a container from the OCI runtime.
+func (r *ConmonOCIRuntime) DeleteContainer(ctr *Container) error {
+ runtimeDir, err := util.GetRuntimeDir()
+ if err != nil {
+ return err
+ }
+ env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
+ return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, append(r.runtimeFlags, "delete", "--force", ctr.ID())...)
+}
+
+// PauseContainer pauses the given container.
+func (r *ConmonOCIRuntime) PauseContainer(ctr *Container) error {
+ runtimeDir, err := util.GetRuntimeDir()
+ if err != nil {
+ return err
+ }
+ env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
+ return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, append(r.runtimeFlags, "pause", ctr.ID())...)
+}
+
+// UnpauseContainer unpauses the given container.
+func (r *ConmonOCIRuntime) UnpauseContainer(ctr *Container) error {
+ runtimeDir, err := util.GetRuntimeDir()
+ if err != nil {
+ return err
+ }
+ env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
+ return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, append(r.runtimeFlags, "resume", ctr.ID())...)
+}
+
+// This filters out ENOTCONN errors which can happen on FreeBSD if the
+// other side of the connection is already closed.
+func socketCloseWrite(conn *net.UnixConn) error {
+ err := conn.CloseWrite()
+ if err != nil && errors.Is(err, syscall.ENOTCONN) {
+ return nil
+ }
+ return err
+}
+
+// HTTPAttach performs an attach for the HTTP API.
+// The caller must handle closing the HTTP connection after this returns.
+// The cancel channel is not closed; it is up to the caller to do so after
+// this function returns.
+// If this is a container with a terminal, we will stream raw. If it is not, we
+// will stream with an 8-byte header to multiplex STDOUT and STDERR.
+// Returns any errors that occurred, and whether the connection was successfully
+// hijacked before that error occurred.
+func (r *ConmonOCIRuntime) HTTPAttach(ctr *Container, req *http.Request, w http.ResponseWriter, streams *HTTPAttachStreams, detachKeys *string, cancel <-chan bool, hijackDone chan<- bool, streamAttach, streamLogs bool) (deferredErr error) {
+ isTerminal := false
+ if ctr.config.Spec.Process != nil {
+ isTerminal = ctr.config.Spec.Process.Terminal
+ }
+
+ if streams != nil {
+ if !streams.Stdin && !streams.Stdout && !streams.Stderr {
+ return fmt.Errorf("must specify at least one stream to attach to: %w", define.ErrInvalidArg)
+ }
+ }
+
+ attachSock, err := r.AttachSocketPath(ctr)
+ if err != nil {
+ return err
+ }
+
+ var conn *net.UnixConn
+ if streamAttach {
+ newConn, err := openUnixSocket(attachSock)
+ if err != nil {
+ return fmt.Errorf("failed to connect to container's attach socket: %v: %w", attachSock, err)
+ }
+ conn = newConn
+ defer func() {
+ if err := conn.Close(); err != nil {
+ logrus.Errorf("Unable to close container %s attach socket: %q", ctr.ID(), err)
+ }
+ }()
+
+ logrus.Debugf("Successfully connected to container %s attach socket %s", ctr.ID(), attachSock)
+ }
+
+ detachString := ctr.runtime.config.Engine.DetachKeys
+ if detachKeys != nil {
+ detachString = *detachKeys
+ }
+ detach, err := processDetachKeys(detachString)
+ if err != nil {
+ return err
+ }
+
+ attachStdout := true
+ attachStderr := true
+ attachStdin := true
+ if streams != nil {
+ attachStdout = streams.Stdout
+ attachStderr = streams.Stderr
+ attachStdin = streams.Stdin
+ }
+
+ logrus.Debugf("Going to hijack container %s attach connection", ctr.ID())
+
+ // Alright, let's hijack.
+ hijacker, ok := w.(http.Hijacker)
+ if !ok {
+ return fmt.Errorf("unable to hijack connection")
+ }
+
+ httpCon, httpBuf, err := hijacker.Hijack()
+ if err != nil {
+ return fmt.Errorf("hijacking connection: %w", err)
+ }
+
+ hijackDone <- true
+
+ writeHijackHeader(req, httpBuf)
+
+ // Force a flush after the header is written.
+ if err := httpBuf.Flush(); err != nil {
+ return fmt.Errorf("flushing HTTP hijack header: %w", err)
+ }
+
+ defer func() {
+ hijackWriteErrorAndClose(deferredErr, ctr.ID(), isTerminal, httpCon, httpBuf)
+ }()
+
+ logrus.Debugf("Hijack for container %s attach session done, ready to stream", ctr.ID())
+
+ // TODO: This is gross. Really, really gross.
+ // I want to say we should read all the logs into an array before
+ // calling this, in container_api.go, but that could take a lot of
+ // memory...
+ // On the whole, we need to figure out a better way of doing this,
+ // though.
+ logSize := 0
+ if streamLogs {
+ logrus.Debugf("Will stream logs for container %s attach session", ctr.ID())
+
+ // Get all logs for the container
+ logChan := make(chan *logs.LogLine)
+ logOpts := new(logs.LogOptions)
+ logOpts.Tail = -1
+ logOpts.WaitGroup = new(sync.WaitGroup)
+ errChan := make(chan error)
+ go func() {
+ var err error
+ // In non-terminal mode we need to prepend with the
+ // stream header.
+ logrus.Debugf("Writing logs for container %s to HTTP attach", ctr.ID())
+ for logLine := range logChan {
+ if !isTerminal {
+ device := logLine.Device
+ var header []byte
+ headerLen := uint32(len(logLine.Msg))
+ logSize += len(logLine.Msg)
+ switch strings.ToLower(device) {
+ case "stdin":
+ header = makeHTTPAttachHeader(0, headerLen)
+ case "stdout":
+ header = makeHTTPAttachHeader(1, headerLen)
+ case "stderr":
+ header = makeHTTPAttachHeader(2, headerLen)
+ default:
+ logrus.Errorf("Unknown device for log line: %s", device)
+ header = makeHTTPAttachHeader(1, headerLen)
+ }
+ _, err = httpBuf.Write(header)
+ if err != nil {
+ break
+ }
+ }
+ _, err = httpBuf.Write([]byte(logLine.Msg))
+ if err != nil {
+ break
+ }
+ if !logLine.Partial() {
+ _, err = httpBuf.Write([]byte("\n"))
+ if err != nil {
+ break
+ }
+ }
+ err = httpBuf.Flush()
+ if err != nil {
+ break
+ }
+ }
+ errChan <- err
+ }()
+ if err := ctr.ReadLog(context.Background(), logOpts, logChan, 0); err != nil {
+ return err
+ }
+ go func() {
+ logOpts.WaitGroup.Wait()
+ close(logChan)
+ }()
+ logrus.Debugf("Done reading logs for container %s, %d bytes", ctr.ID(), logSize)
+ if err := <-errChan; err != nil {
+ return err
+ }
+ }
+ if !streamAttach {
+ logrus.Debugf("Done streaming logs for container %s attach, exiting as attach streaming not requested", ctr.ID())
+ return nil
+ }
+
+ logrus.Debugf("Forwarding attach output for container %s", ctr.ID())
+
+ stdoutChan := make(chan error)
+ stdinChan := make(chan error)
+
+ // Handle STDOUT/STDERR
+ go func() {
+ var err error
+ if isTerminal {
+ // Hack: return immediately if attachStdout not set to
+ // emulate Docker.
+ // Basically, when terminal is set, STDERR goes nowhere.
+ // Everything does over STDOUT.
+ // Therefore, if not attaching STDOUT - we'll never copy
+ // anything from here.
+ logrus.Debugf("Performing terminal HTTP attach for container %s", ctr.ID())
+ if attachStdout {
+ err = httpAttachTerminalCopy(conn, httpBuf, ctr.ID())
+ }
+ } else {
+ logrus.Debugf("Performing non-terminal HTTP attach for container %s", ctr.ID())
+ err = httpAttachNonTerminalCopy(conn, httpBuf, ctr.ID(), attachStdin, attachStdout, attachStderr)
+ }
+ stdoutChan <- err
+ logrus.Debugf("STDOUT/ERR copy completed")
+ }()
+ // Next, STDIN. Avoid entirely if attachStdin unset.
+ if attachStdin {
+ go func() {
+ _, err := cutil.CopyDetachable(conn, httpBuf, detach)
+ logrus.Debugf("STDIN copy completed")
+ stdinChan <- err
+ }()
+ }
+
+ for {
+ select {
+ case err := <-stdoutChan:
+ if err != nil {
+ return err
+ }
+
+ return nil
+ case err := <-stdinChan:
+ if err != nil {
+ return err
+ }
+ // copy stdin is done, close it
+ if connErr := socketCloseWrite(conn); connErr != nil {
+ logrus.Errorf("Unable to close conn: %v", connErr)
+ }
+ case <-cancel:
+ return nil
+ }
+ }
+}
+
+// isRetryable returns whether the error was caused by a blocked syscall or the
+// specified operation on a non blocking file descriptor wasn't ready for completion.
+func isRetryable(err error) bool {
+ var errno syscall.Errno
+ if errors.As(err, &errno) {
+ return errno == syscall.EINTR || errno == syscall.EAGAIN
+ }
+ return false
+}
+
+// openControlFile opens the terminal control file.
+func openControlFile(ctr *Container, parentDir string) (*os.File, error) {
+ controlPath := filepath.Join(parentDir, "ctl")
+ for i := 0; i < 600; i++ {
+ controlFile, err := os.OpenFile(controlPath, unix.O_WRONLY|unix.O_NONBLOCK, 0)
+ if err == nil {
+ return controlFile, nil
+ }
+ if !isRetryable(err) {
+ return nil, fmt.Errorf("could not open ctl file for terminal resize for container %s: %w", ctr.ID(), err)
+ }
+ time.Sleep(time.Second / 10)
+ }
+ return nil, fmt.Errorf("timeout waiting for %q", controlPath)
+}
+
+// AttachResize resizes the terminal used by the given container.
+func (r *ConmonOCIRuntime) AttachResize(ctr *Container, newSize resize.TerminalSize) error {
+ controlFile, err := openControlFile(ctr, ctr.bundlePath())
+ if err != nil {
+ return err
+ }
+ defer controlFile.Close()
+
+ logrus.Debugf("Received a resize event for container %s: %+v", ctr.ID(), newSize)
+ if _, err = fmt.Fprintf(controlFile, "%d %d %d\n", 1, newSize.Height, newSize.Width); err != nil {
+ return fmt.Errorf("failed to write to ctl file to resize terminal: %w", err)
+ }
+
+ return nil
+}
+
+// CheckpointContainer checkpoints the given container.
+func (r *ConmonOCIRuntime) CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) (int64, error) {
+ // imagePath is used by CRIU to store the actual checkpoint files
+ imagePath := ctr.CheckpointPath()
+ if options.PreCheckPoint {
+ imagePath = ctr.PreCheckPointPath()
+ }
+ // workPath will be used to store dump.log and stats-dump
+ workPath := ctr.bundlePath()
+ logrus.Debugf("Writing checkpoint to %s", imagePath)
+ logrus.Debugf("Writing checkpoint logs to %s", workPath)
+ logrus.Debugf("Pre-dump the container %t", options.PreCheckPoint)
+ args := []string{}
+ args = append(args, r.runtimeFlags...)
+ args = append(args, "checkpoint")
+ args = append(args, "--image-path")
+ args = append(args, imagePath)
+ args = append(args, "--work-path")
+ args = append(args, workPath)
+ if options.KeepRunning {
+ args = append(args, "--leave-running")
+ }
+ if options.TCPEstablished {
+ args = append(args, "--tcp-established")
+ }
+ if options.FileLocks {
+ args = append(args, "--file-locks")
+ }
+ if !options.PreCheckPoint && options.KeepRunning {
+ args = append(args, "--leave-running")
+ }
+ if options.PreCheckPoint {
+ args = append(args, "--pre-dump")
+ }
+ if !options.PreCheckPoint && options.WithPrevious {
+ args = append(
+ args,
+ "--parent-path",
+ filepath.Join("..", preCheckpointDir),
+ )
+ }
+
+ args = append(args, ctr.ID())
+ logrus.Debugf("the args to checkpoint: %s %s", r.path, strings.Join(args, " "))
+
+ runtimeDir, err := util.GetRuntimeDir()
+ if err != nil {
+ return 0, err
+ }
+ env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
+ if path, ok := os.LookupEnv("PATH"); ok {
+ env = append(env, fmt.Sprintf("PATH=%s", path))
+ }
+
+ var runtimeCheckpointStarted time.Time
+ err = r.withContainerSocketLabel(ctr, func() error {
+ runtimeCheckpointStarted = time.Now()
+ return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, args...)
+ })
+
+ runtimeCheckpointDuration := func() int64 {
+ if options.PrintStats {
+ return time.Since(runtimeCheckpointStarted).Microseconds()
+ }
+ return 0
+ }()
+
+ return runtimeCheckpointDuration, err
+}
+
+func (r *ConmonOCIRuntime) CheckConmonRunning(ctr *Container) (bool, error) {
+ if ctr.state.ConmonPID == 0 {
+ // If the container is running or paused, assume Conmon is
+ // running. We didn't record Conmon PID on some old versions, so
+ // that is likely what's going on...
+ // Unusual enough that we should print a warning message though.
+ if ctr.ensureState(define.ContainerStateRunning, define.ContainerStatePaused) {
+ logrus.Warnf("Conmon PID is not set, but container is running!")
+ return true, nil
+ }
+ // Container's not running, so conmon PID being unset is
+ // expected. Conmon is not running.
+ return false, nil
+ }
+
+ // We have a conmon PID. Ping it with signal 0.
+ if err := unix.Kill(ctr.state.ConmonPID, 0); err != nil {
+ if err == unix.ESRCH {
+ return false, nil
+ }
+ return false, fmt.Errorf("pinging container %s conmon with signal 0: %w", ctr.ID(), err)
+ }
+ return true, nil
+}
+
+// SupportsCheckpoint checks if the OCI runtime supports checkpointing
+// containers.
+func (r *ConmonOCIRuntime) SupportsCheckpoint() bool {
+ return crutils.CRRuntimeSupportsCheckpointRestore(r.path)
+}
+
+// SupportsJSONErrors checks if the OCI runtime supports JSON-formatted error
+// messages.
+func (r *ConmonOCIRuntime) SupportsJSONErrors() bool {
+ return r.supportsJSON
+}
+
+// SupportsNoCgroups checks if the OCI runtime supports running containers
+// without cgroups (the --cgroup-manager=disabled flag).
+func (r *ConmonOCIRuntime) SupportsNoCgroups() bool {
+ return r.supportsNoCgroups
+}
+
+// SupportsKVM checks if the OCI runtime supports running containers
+// without KVM separation
+func (r *ConmonOCIRuntime) SupportsKVM() bool {
+ return r.supportsKVM
+}
+
+// AttachSocketPath is the path to a single container's attach socket.
+func (r *ConmonOCIRuntime) AttachSocketPath(ctr *Container) (string, error) {
+ if ctr == nil {
+ return "", fmt.Errorf("must provide a valid container to get attach socket path: %w", define.ErrInvalidArg)
+ }
+
+ return filepath.Join(ctr.bundlePath(), "attach"), nil
+}
+
+// ExitFilePath is the path to a container's exit file.
+func (r *ConmonOCIRuntime) ExitFilePath(ctr *Container) (string, error) {
+ if ctr == nil {
+ return "", fmt.Errorf("must provide a valid container to get exit file path: %w", define.ErrInvalidArg)
+ }
+ return filepath.Join(r.exitsDir, ctr.ID()), nil
+}
+
+// RuntimeInfo provides information on the runtime.
+func (r *ConmonOCIRuntime) RuntimeInfo() (*define.ConmonInfo, *define.OCIRuntimeInfo, error) {
+ runtimePackage := packageVersion(r.path)
+ conmonPackage := packageVersion(r.conmonPath)
+ runtimeVersion, err := r.getOCIRuntimeVersion()
+ if err != nil {
+ return nil, nil, fmt.Errorf("getting version of OCI runtime %s: %w", r.name, err)
+ }
+ conmonVersion, err := r.getConmonVersion()
+ if err != nil {
+ return nil, nil, fmt.Errorf("getting conmon version: %w", err)
+ }
+
+ conmon := define.ConmonInfo{
+ Package: conmonPackage,
+ Path: r.conmonPath,
+ Version: conmonVersion,
+ }
+ ocirt := define.OCIRuntimeInfo{
+ Name: r.name,
+ Path: r.path,
+ Package: runtimePackage,
+ Version: runtimeVersion,
+ }
+ return &conmon, &ocirt, nil
+}
+
+// makeAccessible changes the path permission and each parent directory to have --x--x--x
+func makeAccessible(path string, uid, gid int) error {
+ for ; path != "/"; path = filepath.Dir(path) {
+ st, err := os.Stat(path)
+ if err != nil {
+ if os.IsNotExist(err) {
+ return nil
+ }
+ return err
+ }
+ if int(st.Sys().(*syscall.Stat_t).Uid) == uid && int(st.Sys().(*syscall.Stat_t).Gid) == gid {
+ continue
+ }
+ if st.Mode()&0111 != 0111 {
+ if err := os.Chmod(path, st.Mode()|0111); err != nil {
+ return err
+ }
+ }
+ }
+ return nil
+}
+
+// Wait for a container which has been sent a signal to stop
+func waitContainerStop(ctr *Container, timeout time.Duration) error {
+ return waitPidStop(ctr.state.PID, timeout)
+}
+
+// Wait for a given PID to stop
+func waitPidStop(pid int, timeout time.Duration) error {
+ done := make(chan struct{})
+ chControl := make(chan struct{})
+ go func() {
+ for {
+ select {
+ case <-chControl:
+ return
+ default:
+ if err := unix.Kill(pid, 0); err != nil {
+ if err == unix.ESRCH {
+ close(done)
+ return
+ }
+ logrus.Errorf("Pinging PID %d with signal 0: %v", pid, err)
+ }
+ time.Sleep(100 * time.Millisecond)
+ }
+ }
+ }()
+ select {
+ case <-done:
+ return nil
+ case <-time.After(timeout):
+ close(chControl)
+ return fmt.Errorf("given PIDs did not die within timeout")
+ }
+}
+
+func (r *ConmonOCIRuntime) getLogTag(ctr *Container) (string, error) {
+ logTag := ctr.LogTag()
+ if logTag == "" {
+ return "", nil
+ }
+ data, err := ctr.inspectLocked(false)
+ if err != nil {
+ // FIXME: this error should probably be returned
+ return "", nil //nolint: nilerr
+ }
+ tmpl, err := template.New("container").Parse(logTag)
+ if err != nil {
+ return "", fmt.Errorf("template parsing error %s: %w", logTag, err)
+ }
+ var b bytes.Buffer
+ err = tmpl.Execute(&b, data)
+ if err != nil {
+ return "", err
+ }
+ return b.String(), nil
+}
+
+// createOCIContainer generates this container's main conmon instance and prepares it for starting
+func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (int64, error) {
+ var stderrBuf bytes.Buffer
+
+ runtimeDir, err := util.GetRuntimeDir()
+ if err != nil {
+ return 0, err
+ }
+
+ parentSyncPipe, childSyncPipe, err := newPipe()
+ if err != nil {
+ return 0, fmt.Errorf("creating socket pair: %w", err)
+ }
+ defer errorhandling.CloseQuiet(parentSyncPipe)
+
+ childStartPipe, parentStartPipe, err := newPipe()
+ if err != nil {
+ return 0, fmt.Errorf("creating socket pair for start pipe: %w", err)
+ }
+
+ defer errorhandling.CloseQuiet(parentStartPipe)
+
+ var ociLog string
+ if logrus.GetLevel() != logrus.DebugLevel && r.supportsJSON {
+ ociLog = filepath.Join(ctr.state.RunDir, "oci-log")
+ }
+
+ logTag, err := r.getLogTag(ctr)
+ if err != nil {
+ return 0, err
+ }
+
+ if ctr.config.CgroupsMode == cgroupSplit {
+ if err := utils.MoveUnderCgroupSubtree("runtime"); err != nil {
+ return 0, err
+ }
+ }
+
+ pidfile := ctr.config.PidFile
+ if pidfile == "" {
+ pidfile = filepath.Join(ctr.state.RunDir, "pidfile")
+ }
+
+ args := r.sharedConmonArgs(ctr, ctr.ID(), ctr.bundlePath(), pidfile, ctr.LogPath(), r.exitsDir, ociLog, ctr.LogDriver(), logTag)
+
+ if ctr.config.SdNotifyMode == define.SdNotifyModeContainer && ctr.config.SdNotifySocket != "" {
+ args = append(args, fmt.Sprintf("--sdnotify-socket=%s", ctr.config.SdNotifySocket))
+ }
+
+ if ctr.config.Spec.Process.Terminal {
+ args = append(args, "-t")
+ } else if ctr.config.Stdin {
+ args = append(args, "-i")
+ }
+
+ if ctr.config.Timeout > 0 {
+ args = append(args, fmt.Sprintf("--timeout=%d", ctr.config.Timeout))
+ }
+
+ if !r.enableKeyring {
+ args = append(args, "--no-new-keyring")
+ }
+ if ctr.config.ConmonPidFile != "" {
+ args = append(args, "--conmon-pidfile", ctr.config.ConmonPidFile)
+ }
+
+ if r.noPivot {
+ args = append(args, "--no-pivot")
+ }
+
+ exitCommand, err := specgenutil.CreateExitCommandArgs(ctr.runtime.storageConfig, ctr.runtime.config, logrus.IsLevelEnabled(logrus.DebugLevel), ctr.AutoRemove(), false)
+ if err != nil {
+ return 0, err
+ }
+ exitCommand = append(exitCommand, ctr.config.ID)
+
+ args = append(args, "--exit-command", exitCommand[0])
+ for _, arg := range exitCommand[1:] {
+ args = append(args, []string{"--exit-command-arg", arg}...)
+ }
+
+ // Pass down the LISTEN_* environment (see #10443).
+ preserveFDs := ctr.config.PreserveFDs
+ if val := os.Getenv("LISTEN_FDS"); val != "" {
+ if ctr.config.PreserveFDs > 0 {
+ logrus.Warnf("Ignoring LISTEN_FDS to preserve custom user-specified FDs")
+ } else {
+ fds, err := strconv.Atoi(val)
+ if err != nil {
+ return 0, fmt.Errorf("converting LISTEN_FDS=%s: %w", val, err)
+ }
+ preserveFDs = uint(fds)
+ }
+ }
+
+ if preserveFDs > 0 {
+ args = append(args, formatRuntimeOpts("--preserve-fds", fmt.Sprintf("%d", preserveFDs))...)
+ }
+
+ if restoreOptions != nil {
+ args = append(args, "--restore", ctr.CheckpointPath())
+ if restoreOptions.TCPEstablished {
+ args = append(args, "--runtime-opt", "--tcp-established")
+ }
+ if restoreOptions.FileLocks {
+ args = append(args, "--runtime-opt", "--file-locks")
+ }
+ if restoreOptions.Pod != "" {
+ mountLabel := ctr.config.MountLabel
+ processLabel := ctr.config.ProcessLabel
+ if mountLabel != "" {
+ args = append(
+ args,
+ "--runtime-opt",
+ fmt.Sprintf(
+ "--lsm-mount-context=%s",
+ mountLabel,
+ ),
+ )
+ }
+ if processLabel != "" {
+ args = append(
+ args,
+ "--runtime-opt",
+ fmt.Sprintf(
+ "--lsm-profile=selinux:%s",
+ processLabel,
+ ),
+ )
+ }
+ }
+ }
+
+ logrus.WithFields(logrus.Fields{
+ "args": args,
+ }).Debugf("running conmon: %s", r.conmonPath)
+
+ cmd := exec.Command(r.conmonPath, args...)
+ cmd.SysProcAttr = &syscall.SysProcAttr{
+ Setpgid: true,
+ }
+ // TODO this is probably a really bad idea for some uses
+ // Make this configurable
+ cmd.Stdin = os.Stdin
+ cmd.Stdout = os.Stdout
+ cmd.Stderr = os.Stderr
+ if ctr.config.Spec.Process.Terminal {
+ cmd.Stderr = &stderrBuf
+ }
+
+ // 0, 1 and 2 are stdin, stdout and stderr
+ conmonEnv := r.configureConmonEnv(runtimeDir)
+
+ var filesToClose []*os.File
+ if preserveFDs > 0 {
+ for fd := 3; fd < int(3+preserveFDs); fd++ {
+ f := os.NewFile(uintptr(fd), fmt.Sprintf("fd-%d", fd))
+ filesToClose = append(filesToClose, f)
+ cmd.ExtraFiles = append(cmd.ExtraFiles, f)
+ }
+ }
+
+ cmd.Env = r.conmonEnv
+ // we don't want to step on users fds they asked to preserve
+ // Since 0-2 are used for stdio, start the fds we pass in at preserveFDs+3
+ cmd.Env = append(cmd.Env, fmt.Sprintf("_OCI_SYNCPIPE=%d", preserveFDs+3), fmt.Sprintf("_OCI_STARTPIPE=%d", preserveFDs+4))
+ cmd.Env = append(cmd.Env, conmonEnv...)
+ cmd.ExtraFiles = append(cmd.ExtraFiles, childSyncPipe, childStartPipe)
+
+ if r.reservePorts && !rootless.IsRootless() && !ctr.config.NetMode.IsSlirp4netns() {
+ ports, err := bindPorts(ctr.convertPortMappings())
+ if err != nil {
+ return 0, err
+ }
+ filesToClose = append(filesToClose, ports...)
+
+ // Leak the port we bound in the conmon process. These fd's won't be used
+ // by the container and conmon will keep the ports busy so that another
+ // process cannot use them.
+ cmd.ExtraFiles = append(cmd.ExtraFiles, ports...)
+ }
+
+ if ctr.config.NetMode.IsSlirp4netns() || rootless.IsRootless() {
+ if ctr.config.PostConfigureNetNS {
+ havePortMapping := len(ctr.config.PortMappings) > 0
+ if havePortMapping {
+ ctr.rootlessPortSyncR, ctr.rootlessPortSyncW, err = os.Pipe()
+ if err != nil {
+ return 0, fmt.Errorf("failed to create rootless port sync pipe: %w", err)
+ }
+ }
+ ctr.rootlessSlirpSyncR, ctr.rootlessSlirpSyncW, err = os.Pipe()
+ if err != nil {
+ return 0, fmt.Errorf("failed to create rootless network sync pipe: %w", err)
+ }
+ } else {
+ if ctr.rootlessSlirpSyncR != nil {
+ defer errorhandling.CloseQuiet(ctr.rootlessSlirpSyncR)
+ }
+ if ctr.rootlessSlirpSyncW != nil {
+ defer errorhandling.CloseQuiet(ctr.rootlessSlirpSyncW)
+ }
+ }
+ // Leak one end in conmon, the other one will be leaked into slirp4netns
+ cmd.ExtraFiles = append(cmd.ExtraFiles, ctr.rootlessSlirpSyncW)
+
+ if ctr.rootlessPortSyncW != nil {
+ defer errorhandling.CloseQuiet(ctr.rootlessPortSyncW)
+ // Leak one end in conmon, the other one will be leaked into rootlessport
+ cmd.ExtraFiles = append(cmd.ExtraFiles, ctr.rootlessPortSyncW)
+ }
+ }
+ var runtimeRestoreStarted time.Time
+ if restoreOptions != nil {
+ runtimeRestoreStarted = time.Now()
+ }
+ err = startCommand(cmd, ctr)
+
+ // regardless of whether we errored or not, we no longer need the children pipes
+ childSyncPipe.Close()
+ childStartPipe.Close()
+ if err != nil {
+ return 0, err
+ }
+ if err := r.moveConmonToCgroupAndSignal(ctr, cmd, parentStartPipe); err != nil {
+ return 0, err
+ }
+ /* Wait for initial setup and fork, and reap child */
+ err = cmd.Wait()
+ if err != nil {
+ return 0, err
+ }
+
+ pid, err := readConmonPipeData(r.name, parentSyncPipe, ociLog)
+ if err != nil {
+ if err2 := r.DeleteContainer(ctr); err2 != nil {
+ logrus.Errorf("Removing container %s from runtime after creation failed", ctr.ID())
+ }
+ return 0, err
+ }
+ ctr.state.PID = pid
+
+ conmonPID, err := readConmonPidFile(ctr.config.ConmonPidFile)
+ if err != nil {
+ logrus.Warnf("Error reading conmon pid file for container %s: %v", ctr.ID(), err)
+ } else if conmonPID > 0 {
+ // conmon not having a pid file is a valid state, so don't set it if we don't have it
+ logrus.Infof("Got Conmon PID as %d", conmonPID)
+ ctr.state.ConmonPID = conmonPID
+ }
+
+ runtimeRestoreDuration := func() int64 {
+ if restoreOptions != nil && restoreOptions.PrintStats {
+ return time.Since(runtimeRestoreStarted).Microseconds()
+ }
+ return 0
+ }()
+
+ // These fds were passed down to the runtime. Close them
+ // and not interfere
+ for _, f := range filesToClose {
+ errorhandling.CloseQuiet(f)
+ }
+
+ return runtimeRestoreDuration, nil
+}
+
+// configureConmonEnv gets the environment values to add to conmon's exec struct
+// TODO this may want to be less hardcoded/more configurable in the future
+func (r *ConmonOCIRuntime) configureConmonEnv(runtimeDir string) []string {
+ var env []string
+ for _, e := range os.Environ() {
+ if strings.HasPrefix(e, "LC_") {
+ env = append(env, e)
+ }
+ }
+ if path, ok := os.LookupEnv("PATH"); ok {
+ env = append(env, fmt.Sprintf("PATH=%s", path))
+ }
+ if conf, ok := os.LookupEnv("CONTAINERS_CONF"); ok {
+ env = append(env, fmt.Sprintf("CONTAINERS_CONF=%s", conf))
+ }
+ if conf, ok := os.LookupEnv("CONTAINERS_HELPER_BINARY_DIR"); ok {
+ env = append(env, fmt.Sprintf("CONTAINERS_HELPER_BINARY_DIR=%s", conf))
+ }
+ env = append(env, fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir))
+ env = append(env, fmt.Sprintf("_CONTAINERS_USERNS_CONFIGURED=%s", os.Getenv("_CONTAINERS_USERNS_CONFIGURED")))
+ env = append(env, fmt.Sprintf("_CONTAINERS_ROOTLESS_UID=%s", os.Getenv("_CONTAINERS_ROOTLESS_UID")))
+ home := homedir.Get()
+ if home != "" {
+ env = append(env, fmt.Sprintf("HOME=%s", home))
+ }
+
+ return env
+}
+
+// sharedConmonArgs takes common arguments for exec and create/restore and formats them for the conmon CLI
+func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, pidPath, logPath, exitDir, ociLogPath, logDriver, logTag string) []string {
+ // set the conmon API version to be able to use the correct sync struct keys
+ args := []string{
+ "--api-version", "1",
+ "-c", ctr.ID(),
+ "-u", cuuid,
+ "-r", r.path,
+ "-b", bundlePath,
+ "-p", pidPath,
+ "-n", ctr.Name(),
+ "--exit-dir", exitDir,
+ "--full-attach",
+ }
+ if len(r.runtimeFlags) > 0 {
+ rFlags := []string{}
+ for _, arg := range r.runtimeFlags {
+ rFlags = append(rFlags, "--runtime-arg", arg)
+ }
+ args = append(args, rFlags...)
+ }
+
+ if ctr.CgroupManager() == config.SystemdCgroupsManager && !ctr.config.NoCgroups && ctr.config.CgroupsMode != cgroupSplit {
+ args = append(args, "-s")
+ }
+
+ var logDriverArg string
+ switch logDriver {
+ case define.JournaldLogging:
+ logDriverArg = define.JournaldLogging
+ case define.NoLogging:
+ logDriverArg = define.NoLogging
+ case define.PassthroughLogging:
+ logDriverArg = define.PassthroughLogging
+ //lint:ignore ST1015 the default case has to be here
+ default: //nolint:stylecheck,gocritic
+ // No case here should happen except JSONLogging, but keep this here in case the options are extended
+ logrus.Errorf("%s logging specified but not supported. Choosing k8s-file logging instead", ctr.LogDriver())
+ fallthrough
+ case "":
+ // to get here, either a user would specify `--log-driver ""`, or this came from another place in libpod
+ // since the former case is obscure, and the latter case isn't an error, let's silently fallthrough
+ fallthrough
+ case define.JSONLogging:
+ fallthrough
+ case define.KubernetesLogging:
+ logDriverArg = fmt.Sprintf("%s:%s", define.KubernetesLogging, logPath)
+ }
+
+ args = append(args, "-l", logDriverArg)
+ logLevel := logrus.GetLevel()
+ args = append(args, "--log-level", logLevel.String())
+
+ if logLevel == logrus.DebugLevel {
+ logrus.Debugf("%s messages will be logged to syslog", r.conmonPath)
+ args = append(args, "--syslog")
+ }
+
+ size := r.logSizeMax
+ if ctr.config.LogSize > 0 {
+ size = ctr.config.LogSize
+ }
+ if size > 0 {
+ args = append(args, "--log-size-max", fmt.Sprintf("%v", size))
+ }
+
+ if ociLogPath != "" {
+ args = append(args, "--runtime-arg", "--log-format=json", "--runtime-arg", "--log", fmt.Sprintf("--runtime-arg=%s", ociLogPath))
+ }
+ if logTag != "" {
+ args = append(args, "--log-tag", logTag)
+ }
+ if ctr.config.NoCgroups {
+ logrus.Debugf("Running with no Cgroups")
+ args = append(args, "--runtime-arg", "--cgroup-manager", "--runtime-arg", "disabled")
+ }
+ return args
+}
+
+func startCommand(cmd *exec.Cmd, ctr *Container) error {
+ // Make sure to unset the NOTIFY_SOCKET and reset it afterwards if needed.
+ switch ctr.config.SdNotifyMode {
+ case define.SdNotifyModeContainer, define.SdNotifyModeIgnore:
+ if prev := os.Getenv("NOTIFY_SOCKET"); prev != "" {
+ if err := os.Unsetenv("NOTIFY_SOCKET"); err != nil {
+ logrus.Warnf("Error unsetting NOTIFY_SOCKET %v", err)
+ }
+ defer func() {
+ if err := os.Setenv("NOTIFY_SOCKET", prev); err != nil {
+ logrus.Errorf("Resetting NOTIFY_SOCKET=%s", prev)
+ }
+ }()
+ }
+ }
+
+ return cmd.Start()
+}
+
+// newPipe creates a unix socket pair for communication.
+// Returns two files - first is parent, second is child.
+func newPipe() (*os.File, *os.File, error) {
+ fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_SEQPACKET|unix.SOCK_CLOEXEC, 0)
+ if err != nil {
+ return nil, nil, err
+ }
+ return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil
+}
+
+// readConmonPidFile attempts to read conmon's pid from its pid file
+func readConmonPidFile(pidFile string) (int, error) {
+ // Let's try reading the Conmon pid at the same time.
+ if pidFile != "" {
+ contents, err := ioutil.ReadFile(pidFile)
+ if err != nil {
+ return -1, err
+ }
+ // Convert it to an int
+ conmonPID, err := strconv.Atoi(string(contents))
+ if err != nil {
+ return -1, err
+ }
+ return conmonPID, nil
+ }
+ return 0, nil
+}
+
+// readConmonPipeData attempts to read a syncInfo struct from the pipe
+func readConmonPipeData(runtimeName string, pipe *os.File, ociLog string) (int, error) {
+ // syncInfo is used to return data from monitor process to daemon
+ type syncInfo struct {
+ Data int `json:"data"`
+ Message string `json:"message,omitempty"`
+ }
+
+ // Wait to get container pid from conmon
+ type syncStruct struct {
+ si *syncInfo
+ err error
+ }
+ ch := make(chan syncStruct)
+ go func() {
+ var si *syncInfo
+ rdr := bufio.NewReader(pipe)
+ b, err := rdr.ReadBytes('\n')
+ // ignore EOF here, error is returned even when data was read
+ // if it is no valid json unmarshal will fail below
+ if err != nil && !errors.Is(err, io.EOF) {
+ ch <- syncStruct{err: err}
+ }
+ if err := json.Unmarshal(b, &si); err != nil {
+ ch <- syncStruct{err: fmt.Errorf("conmon bytes %q: %w", string(b), err)}
+ return
+ }
+ ch <- syncStruct{si: si}
+ }()
+
+ data := -1 //nolint: wastedassign
+ select {
+ case ss := <-ch:
+ if ss.err != nil {
+ if ociLog != "" {
+ ociLogData, err := ioutil.ReadFile(ociLog)
+ if err == nil {
+ var ociErr ociError
+ if err := json.Unmarshal(ociLogData, &ociErr); err == nil {
+ return -1, getOCIRuntimeError(runtimeName, ociErr.Msg)
+ }
+ }
+ }
+ return -1, fmt.Errorf("container create failed (no logs from conmon): %w", ss.err)
+ }
+ logrus.Debugf("Received: %d", ss.si.Data)
+ if ss.si.Data < 0 {
+ if ociLog != "" {
+ ociLogData, err := ioutil.ReadFile(ociLog)
+ if err == nil {
+ var ociErr ociError
+ if err := json.Unmarshal(ociLogData, &ociErr); err == nil {
+ return ss.si.Data, getOCIRuntimeError(runtimeName, ociErr.Msg)
+ }
+ }
+ }
+ // If we failed to parse the JSON errors, then print the output as it is
+ if ss.si.Message != "" {
+ return ss.si.Data, getOCIRuntimeError(runtimeName, ss.si.Message)
+ }
+ return ss.si.Data, fmt.Errorf("container create failed: %w", define.ErrInternal)
+ }
+ data = ss.si.Data
+ case <-time.After(define.ContainerCreateTimeout):
+ return -1, fmt.Errorf("container creation timeout: %w", define.ErrInternal)
+ }
+ return data, nil
+}
+
+// writeConmonPipeData writes nonce data to a pipe
+func writeConmonPipeData(pipe *os.File) error {
+ someData := []byte{0}
+ _, err := pipe.Write(someData)
+ return err
+}
+
+// formatRuntimeOpts prepends opts passed to it with --runtime-opt for passing to conmon
+func formatRuntimeOpts(opts ...string) []string {
+ args := make([]string, 0, len(opts)*2)
+ for _, o := range opts {
+ args = append(args, "--runtime-opt", o)
+ }
+ return args
+}
+
+// getConmonVersion returns a string representation of the conmon version.
+func (r *ConmonOCIRuntime) getConmonVersion() (string, error) {
+ output, err := utils.ExecCmd(r.conmonPath, "--version")
+ if err != nil {
+ return "", err
+ }
+ return strings.TrimSuffix(strings.Replace(output, "\n", ", ", 1), "\n"), nil
+}
+
+// getOCIRuntimeVersion returns a string representation of the OCI runtime's
+// version.
+func (r *ConmonOCIRuntime) getOCIRuntimeVersion() (string, error) {
+ output, err := utils.ExecCmd(r.path, "--version")
+ if err != nil {
+ return "", err
+ }
+ return strings.TrimSuffix(output, "\n"), nil
+}
+
+// Copy data from container to HTTP connection, for terminal attach.
+// Container is the container's attach socket connection, http is a buffer for
+// the HTTP connection. cid is the ID of the container the attach session is
+// running for (used solely for error messages).
+func httpAttachTerminalCopy(container *net.UnixConn, http *bufio.ReadWriter, cid string) error {
+ buf := make([]byte, bufferSize)
+ for {
+ numR, err := container.Read(buf)
+ logrus.Debugf("Read fd(%d) %d/%d bytes for container %s", int(buf[0]), numR, len(buf), cid)
+
+ if numR > 0 {
+ switch buf[0] {
+ case AttachPipeStdout:
+ // Do nothing
+ default:
+ logrus.Errorf("Received unexpected attach type %+d, discarding %d bytes", buf[0], numR)
+ continue
+ }
+
+ numW, err2 := http.Write(buf[1:numR])
+ if err2 != nil {
+ if err != nil {
+ logrus.Errorf("Reading container %s STDOUT: %v", cid, err)
+ }
+ return err2
+ } else if numW+1 != numR {
+ return io.ErrShortWrite
+ }
+ // We need to force the buffer to write immediately, so
+ // there isn't a delay on the terminal side.
+ if err2 := http.Flush(); err2 != nil {
+ if err != nil {
+ logrus.Errorf("Reading container %s STDOUT: %v", cid, err)
+ }
+ return err2
+ }
+ }
+ if err != nil {
+ if err == io.EOF {
+ return nil
+ }
+ return err
+ }
+ }
+}
+
+// Copy data from a container to an HTTP connection, for non-terminal attach.
+// Appends a header to multiplex input.
+func httpAttachNonTerminalCopy(container *net.UnixConn, http *bufio.ReadWriter, cid string, stdin, stdout, stderr bool) error {
+ buf := make([]byte, bufferSize)
+ for {
+ numR, err := container.Read(buf)
+ if numR > 0 {
+ var headerBuf []byte
+
+ // Subtract 1 because we strip the first byte (used for
+ // multiplexing by Conmon).
+ headerLen := uint32(numR - 1)
+ // Practically speaking, we could make this buf[0] - 1,
+ // but we need to validate it anyway.
+ switch buf[0] {
+ case AttachPipeStdin:
+ headerBuf = makeHTTPAttachHeader(0, headerLen)
+ if !stdin {
+ continue
+ }
+ case AttachPipeStdout:
+ if !stdout {
+ continue
+ }
+ headerBuf = makeHTTPAttachHeader(1, headerLen)
+ case AttachPipeStderr:
+ if !stderr {
+ continue
+ }
+ headerBuf = makeHTTPAttachHeader(2, headerLen)
+ default:
+ logrus.Errorf("Received unexpected attach type %+d, discarding %d bytes", buf[0], numR)
+ continue
+ }
+
+ numH, err2 := http.Write(headerBuf)
+ if err2 != nil {
+ if err != nil {
+ logrus.Errorf("Reading container %s standard streams: %v", cid, err)
+ }
+
+ return err2
+ }
+ // Hardcoding header length is pretty gross, but
+ // fast. Should be safe, as this is a fixed part
+ // of the protocol.
+ if numH != 8 {
+ if err != nil {
+ logrus.Errorf("Reading container %s standard streams: %v", cid, err)
+ }
+
+ return io.ErrShortWrite
+ }
+
+ numW, err2 := http.Write(buf[1:numR])
+ if err2 != nil {
+ if err != nil {
+ logrus.Errorf("Reading container %s standard streams: %v", cid, err)
+ }
+
+ return err2
+ } else if numW+1 != numR {
+ if err != nil {
+ logrus.Errorf("Reading container %s standard streams: %v", cid, err)
+ }
+
+ return io.ErrShortWrite
+ }
+ // We need to force the buffer to write immediately, so
+ // there isn't a delay on the terminal side.
+ if err2 := http.Flush(); err2 != nil {
+ if err != nil {
+ logrus.Errorf("Reading container %s STDOUT: %v", cid, err)
+ }
+ return err2
+ }
+ }
+ if err != nil {
+ if err == io.EOF {
+ return nil
+ }
+
+ return err
+ }
+ }
+}
diff --git a/libpod/oci_conmon_exec_common.go b/libpod/oci_conmon_exec_common.go
new file mode 100644
index 000000000..e5080942b
--- /dev/null
+++ b/libpod/oci_conmon_exec_common.go
@@ -0,0 +1,771 @@
+package libpod
+
+import (
+ "errors"
+ "fmt"
+ "io/ioutil"
+ "net/http"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "strings"
+ "syscall"
+ "time"
+
+ "github.com/containers/common/pkg/config"
+ "github.com/containers/common/pkg/resize"
+ cutil "github.com/containers/common/pkg/util"
+ "github.com/containers/podman/v4/libpod/define"
+ "github.com/containers/podman/v4/pkg/errorhandling"
+ "github.com/containers/podman/v4/pkg/lookup"
+ "github.com/containers/podman/v4/pkg/util"
+ spec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/sirupsen/logrus"
+ "golang.org/x/sys/unix"
+)
+
+// ExecContainer executes a command in a running container
+func (r *ConmonOCIRuntime) ExecContainer(c *Container, sessionID string, options *ExecOptions, streams *define.AttachStreams, newSize *resize.TerminalSize) (int, chan error, error) {
+ if options == nil {
+ return -1, nil, fmt.Errorf("must provide an ExecOptions struct to ExecContainer: %w", define.ErrInvalidArg)
+ }
+ if len(options.Cmd) == 0 {
+ return -1, nil, fmt.Errorf("must provide a command to execute: %w", define.ErrInvalidArg)
+ }
+
+ if sessionID == "" {
+ return -1, nil, fmt.Errorf("must provide a session ID for exec: %w", define.ErrEmptyID)
+ }
+
+ // TODO: Should we default this to false?
+ // Or maybe make streams mandatory?
+ attachStdin := true
+ if streams != nil {
+ attachStdin = streams.AttachInput
+ }
+
+ var ociLog string
+ if logrus.GetLevel() != logrus.DebugLevel && r.supportsJSON {
+ ociLog = c.execOCILog(sessionID)
+ }
+
+ execCmd, pipes, err := r.startExec(c, sessionID, options, attachStdin, ociLog)
+ if err != nil {
+ return -1, nil, err
+ }
+
+ // Only close sync pipe. Start and attach are consumed in the attach
+ // goroutine.
+ defer func() {
+ if pipes.syncPipe != nil && !pipes.syncClosed {
+ errorhandling.CloseQuiet(pipes.syncPipe)
+ pipes.syncClosed = true
+ }
+ }()
+
+ // TODO Only create if !detach
+ // Attach to the container before starting it
+ attachChan := make(chan error)
+ go func() {
+ // attachToExec is responsible for closing pipes
+ attachChan <- c.attachToExec(streams, options.DetachKeys, sessionID, pipes.startPipe, pipes.attachPipe, newSize)
+ close(attachChan)
+ }()
+
+ if err := execCmd.Wait(); err != nil {
+ return -1, nil, fmt.Errorf("cannot run conmon: %w", err)
+ }
+
+ pid, err := readConmonPipeData(r.name, pipes.syncPipe, ociLog)
+
+ return pid, attachChan, err
+}
+
+// ExecContainerHTTP executes a new command in an existing container and
+// forwards its standard streams over an attach
+func (r *ConmonOCIRuntime) ExecContainerHTTP(ctr *Container, sessionID string, options *ExecOptions, req *http.Request, w http.ResponseWriter,
+ streams *HTTPAttachStreams, cancel <-chan bool, hijackDone chan<- bool, holdConnOpen <-chan bool, newSize *resize.TerminalSize) (int, chan error, error) {
+ if streams != nil {
+ if !streams.Stdin && !streams.Stdout && !streams.Stderr {
+ return -1, nil, fmt.Errorf("must provide at least one stream to attach to: %w", define.ErrInvalidArg)
+ }
+ }
+
+ if options == nil {
+ return -1, nil, fmt.Errorf("must provide exec options to ExecContainerHTTP: %w", define.ErrInvalidArg)
+ }
+
+ detachString := config.DefaultDetachKeys
+ if options.DetachKeys != nil {
+ detachString = *options.DetachKeys
+ }
+ detachKeys, err := processDetachKeys(detachString)
+ if err != nil {
+ return -1, nil, err
+ }
+
+ // TODO: Should we default this to false?
+ // Or maybe make streams mandatory?
+ attachStdin := true
+ if streams != nil {
+ attachStdin = streams.Stdin
+ }
+
+ var ociLog string
+ if logrus.GetLevel() != logrus.DebugLevel && r.supportsJSON {
+ ociLog = ctr.execOCILog(sessionID)
+ }
+
+ execCmd, pipes, err := r.startExec(ctr, sessionID, options, attachStdin, ociLog)
+ if err != nil {
+ return -1, nil, err
+ }
+
+ // Only close sync pipe. Start and attach are consumed in the attach
+ // goroutine.
+ defer func() {
+ if pipes.syncPipe != nil && !pipes.syncClosed {
+ errorhandling.CloseQuiet(pipes.syncPipe)
+ pipes.syncClosed = true
+ }
+ }()
+
+ attachChan := make(chan error)
+ conmonPipeDataChan := make(chan conmonPipeData)
+ go func() {
+ // attachToExec is responsible for closing pipes
+ attachChan <- attachExecHTTP(ctr, sessionID, req, w, streams, pipes, detachKeys, options.Terminal, cancel, hijackDone, holdConnOpen, execCmd, conmonPipeDataChan, ociLog, newSize, r.name)
+ close(attachChan)
+ }()
+
+ // NOTE: the channel is needed to communicate conmon's data. In case
+ // of an error, the error will be written on the hijacked http
+ // connection such that remote clients will receive the error.
+ pipeData := <-conmonPipeDataChan
+
+ return pipeData.pid, attachChan, pipeData.err
+}
+
+// conmonPipeData contains the data when reading from conmon's pipe.
+type conmonPipeData struct {
+ pid int
+ err error
+}
+
+// ExecContainerDetached executes a command in a running container, but does
+// not attach to it.
+func (r *ConmonOCIRuntime) ExecContainerDetached(ctr *Container, sessionID string, options *ExecOptions, stdin bool) (int, error) {
+ if options == nil {
+ return -1, fmt.Errorf("must provide exec options to ExecContainerHTTP: %w", define.ErrInvalidArg)
+ }
+
+ var ociLog string
+ if logrus.GetLevel() != logrus.DebugLevel && r.supportsJSON {
+ ociLog = ctr.execOCILog(sessionID)
+ }
+
+ execCmd, pipes, err := r.startExec(ctr, sessionID, options, stdin, ociLog)
+ if err != nil {
+ return -1, err
+ }
+
+ defer func() {
+ pipes.cleanup()
+ }()
+
+ // Wait for Conmon to tell us we're ready to attach.
+ // We aren't actually *going* to attach, but this means that we're good
+ // to proceed.
+ if _, err := readConmonPipeData(r.name, pipes.attachPipe, ""); err != nil {
+ return -1, err
+ }
+
+ // Start the exec session
+ if err := writeConmonPipeData(pipes.startPipe); err != nil {
+ return -1, err
+ }
+
+ // Wait for conmon to succeed, when return.
+ if err := execCmd.Wait(); err != nil {
+ return -1, fmt.Errorf("cannot run conmon: %w", err)
+ }
+
+ pid, err := readConmonPipeData(r.name, pipes.syncPipe, ociLog)
+
+ return pid, err
+}
+
+// ExecAttachResize resizes the TTY of the given exec session.
+func (r *ConmonOCIRuntime) ExecAttachResize(ctr *Container, sessionID string, newSize resize.TerminalSize) error {
+ controlFile, err := openControlFile(ctr, ctr.execBundlePath(sessionID))
+ if err != nil {
+ return err
+ }
+ defer controlFile.Close()
+
+ if _, err = fmt.Fprintf(controlFile, "%d %d %d\n", 1, newSize.Height, newSize.Width); err != nil {
+ return fmt.Errorf("failed to write to ctl file to resize terminal: %w", err)
+ }
+
+ return nil
+}
+
+// ExecStopContainer stops a given exec session in a running container.
+func (r *ConmonOCIRuntime) ExecStopContainer(ctr *Container, sessionID string, timeout uint) error {
+ pid, err := ctr.getExecSessionPID(sessionID)
+ if err != nil {
+ return err
+ }
+
+ logrus.Debugf("Going to stop container %s exec session %s", ctr.ID(), sessionID)
+
+ // Is the session dead?
+ // Ping the PID with signal 0 to see if it still exists.
+ if err := unix.Kill(pid, 0); err != nil {
+ if err == unix.ESRCH {
+ return nil
+ }
+ return fmt.Errorf("pinging container %s exec session %s PID %d with signal 0: %w", ctr.ID(), sessionID, pid, err)
+ }
+
+ if timeout > 0 {
+ // Use SIGTERM by default, then SIGSTOP after timeout.
+ logrus.Debugf("Killing exec session %s (PID %d) of container %s with SIGTERM", sessionID, pid, ctr.ID())
+ if err := unix.Kill(pid, unix.SIGTERM); err != nil {
+ if err == unix.ESRCH {
+ return nil
+ }
+ return fmt.Errorf("killing container %s exec session %s PID %d with SIGTERM: %w", ctr.ID(), sessionID, pid, err)
+ }
+
+ // Wait for the PID to stop
+ if err := waitPidStop(pid, time.Duration(timeout)*time.Second); err != nil {
+ logrus.Infof("Timed out waiting for container %s exec session %s to stop, resorting to SIGKILL: %v", ctr.ID(), sessionID, err)
+ } else {
+ // No error, container is dead
+ return nil
+ }
+ }
+
+ // SIGTERM did not work. On to SIGKILL.
+ logrus.Debugf("Killing exec session %s (PID %d) of container %s with SIGKILL", sessionID, pid, ctr.ID())
+ if err := unix.Kill(pid, unix.SIGTERM); err != nil {
+ if err == unix.ESRCH {
+ return nil
+ }
+ return fmt.Errorf("killing container %s exec session %s PID %d with SIGKILL: %w", ctr.ID(), sessionID, pid, err)
+ }
+
+ // Wait for the PID to stop
+ if err := waitPidStop(pid, killContainerTimeout); err != nil {
+ return fmt.Errorf("timed out waiting for container %s exec session %s PID %d to stop after SIGKILL: %w", ctr.ID(), sessionID, pid, err)
+ }
+
+ return nil
+}
+
+// ExecUpdateStatus checks if the given exec session is still running.
+func (r *ConmonOCIRuntime) ExecUpdateStatus(ctr *Container, sessionID string) (bool, error) {
+ pid, err := ctr.getExecSessionPID(sessionID)
+ if err != nil {
+ return false, err
+ }
+
+ logrus.Debugf("Checking status of container %s exec session %s", ctr.ID(), sessionID)
+
+ // Is the session dead?
+ // Ping the PID with signal 0 to see if it still exists.
+ if err := unix.Kill(pid, 0); err != nil {
+ if err == unix.ESRCH {
+ return false, nil
+ }
+ return false, fmt.Errorf("pinging container %s exec session %s PID %d with signal 0: %w", ctr.ID(), sessionID, pid, err)
+ }
+
+ return true, nil
+}
+
+// ExecAttachSocketPath is the path to a container's exec session attach socket.
+func (r *ConmonOCIRuntime) ExecAttachSocketPath(ctr *Container, sessionID string) (string, error) {
+ // We don't even use container, so don't validity check it
+ if sessionID == "" {
+ return "", fmt.Errorf("must provide a valid session ID to get attach socket path: %w", define.ErrInvalidArg)
+ }
+
+ return filepath.Join(ctr.execBundlePath(sessionID), "attach"), nil
+}
+
+// This contains pipes used by the exec API.
+type execPipes struct {
+ syncPipe *os.File
+ syncClosed bool
+ startPipe *os.File
+ startClosed bool
+ attachPipe *os.File
+ attachClosed bool
+}
+
+func (p *execPipes) cleanup() {
+ if p.syncPipe != nil && !p.syncClosed {
+ errorhandling.CloseQuiet(p.syncPipe)
+ p.syncClosed = true
+ }
+ if p.startPipe != nil && !p.startClosed {
+ errorhandling.CloseQuiet(p.startPipe)
+ p.startClosed = true
+ }
+ if p.attachPipe != nil && !p.attachClosed {
+ errorhandling.CloseQuiet(p.attachPipe)
+ p.attachClosed = true
+ }
+}
+
+// Start an exec session's conmon parent from the given options.
+func (r *ConmonOCIRuntime) startExec(c *Container, sessionID string, options *ExecOptions, attachStdin bool, ociLog string) (_ *exec.Cmd, _ *execPipes, deferredErr error) {
+ pipes := new(execPipes)
+
+ if options == nil {
+ return nil, nil, fmt.Errorf("must provide an ExecOptions struct to ExecContainer: %w", define.ErrInvalidArg)
+ }
+ if len(options.Cmd) == 0 {
+ return nil, nil, fmt.Errorf("must provide a command to execute: %w", define.ErrInvalidArg)
+ }
+
+ if sessionID == "" {
+ return nil, nil, fmt.Errorf("must provide a session ID for exec: %w", define.ErrEmptyID)
+ }
+
+ // create sync pipe to receive the pid
+ parentSyncPipe, childSyncPipe, err := newPipe()
+ if err != nil {
+ return nil, nil, fmt.Errorf("creating socket pair: %w", err)
+ }
+ pipes.syncPipe = parentSyncPipe
+
+ defer func() {
+ if deferredErr != nil {
+ pipes.cleanup()
+ }
+ }()
+
+ // create start pipe to set the cgroup before running
+ // attachToExec is responsible for closing parentStartPipe
+ childStartPipe, parentStartPipe, err := newPipe()
+ if err != nil {
+ return nil, nil, fmt.Errorf("creating socket pair: %w", err)
+ }
+ pipes.startPipe = parentStartPipe
+
+ // create the attach pipe to allow attach socket to be created before
+ // $RUNTIME exec starts running. This is to make sure we can capture all output
+ // from the process through that socket, rather than half reading the log, half attaching to the socket
+ // attachToExec is responsible for closing parentAttachPipe
+ parentAttachPipe, childAttachPipe, err := newPipe()
+ if err != nil {
+ return nil, nil, fmt.Errorf("creating socket pair: %w", err)
+ }
+ pipes.attachPipe = parentAttachPipe
+
+ childrenClosed := false
+ defer func() {
+ if !childrenClosed {
+ errorhandling.CloseQuiet(childSyncPipe)
+ errorhandling.CloseQuiet(childAttachPipe)
+ errorhandling.CloseQuiet(childStartPipe)
+ }
+ }()
+
+ runtimeDir, err := util.GetRuntimeDir()
+ if err != nil {
+ return nil, nil, err
+ }
+
+ finalEnv := make([]string, 0, len(options.Env))
+ for k, v := range options.Env {
+ finalEnv = append(finalEnv, fmt.Sprintf("%s=%s", k, v))
+ }
+
+ processFile, err := c.prepareProcessExec(options, finalEnv, sessionID)
+ if err != nil {
+ return nil, nil, err
+ }
+ defer processFile.Close()
+
+ args := r.sharedConmonArgs(c, sessionID, c.execBundlePath(sessionID), c.execPidPath(sessionID), c.execLogPath(sessionID), c.execExitFileDir(sessionID), ociLog, define.NoLogging, c.config.LogTag)
+
+ if options.PreserveFDs > 0 {
+ args = append(args, formatRuntimeOpts("--preserve-fds", fmt.Sprintf("%d", options.PreserveFDs))...)
+ }
+
+ if options.Terminal {
+ args = append(args, "-t")
+ }
+
+ if attachStdin {
+ args = append(args, "-i")
+ }
+
+ // Append container ID and command
+ args = append(args, "-e")
+ // TODO make this optional when we can detach
+ args = append(args, "--exec-attach")
+ args = append(args, "--exec-process-spec", processFile.Name())
+
+ if len(options.ExitCommand) > 0 {
+ args = append(args, "--exit-command", options.ExitCommand[0])
+ for _, arg := range options.ExitCommand[1:] {
+ args = append(args, []string{"--exit-command-arg", arg}...)
+ }
+ if options.ExitCommandDelay > 0 {
+ args = append(args, []string{"--exit-delay", fmt.Sprintf("%d", options.ExitCommandDelay)}...)
+ }
+ }
+
+ logrus.WithFields(logrus.Fields{
+ "args": args,
+ }).Debugf("running conmon: %s", r.conmonPath)
+ execCmd := exec.Command(r.conmonPath, args...)
+
+ // TODO: This is commented because it doesn't make much sense in HTTP
+ // attach, and I'm not certain it does for non-HTTP attach as well.
+ // if streams != nil {
+ // // Don't add the InputStream to the execCmd. Instead, the data should be passed
+ // // through CopyDetachable
+ // if streams.AttachOutput {
+ // execCmd.Stdout = options.Streams.OutputStream
+ // }
+ // if streams.AttachError {
+ // execCmd.Stderr = options.Streams.ErrorStream
+ // }
+ // }
+
+ conmonEnv := r.configureConmonEnv(runtimeDir)
+
+ var filesToClose []*os.File
+ if options.PreserveFDs > 0 {
+ for fd := 3; fd < int(3+options.PreserveFDs); fd++ {
+ f := os.NewFile(uintptr(fd), fmt.Sprintf("fd-%d", fd))
+ filesToClose = append(filesToClose, f)
+ execCmd.ExtraFiles = append(execCmd.ExtraFiles, f)
+ }
+ }
+
+ // we don't want to step on users fds they asked to preserve
+ // Since 0-2 are used for stdio, start the fds we pass in at preserveFDs+3
+ execCmd.Env = r.conmonEnv
+ execCmd.Env = append(execCmd.Env, fmt.Sprintf("_OCI_SYNCPIPE=%d", options.PreserveFDs+3), fmt.Sprintf("_OCI_STARTPIPE=%d", options.PreserveFDs+4), fmt.Sprintf("_OCI_ATTACHPIPE=%d", options.PreserveFDs+5))
+ execCmd.Env = append(execCmd.Env, conmonEnv...)
+
+ execCmd.ExtraFiles = append(execCmd.ExtraFiles, childSyncPipe, childStartPipe, childAttachPipe)
+ execCmd.Dir = c.execBundlePath(sessionID)
+ execCmd.SysProcAttr = &syscall.SysProcAttr{
+ Setpgid: true,
+ }
+
+ err = startCommand(execCmd, c)
+
+ // We don't need children pipes on the parent side
+ errorhandling.CloseQuiet(childSyncPipe)
+ errorhandling.CloseQuiet(childAttachPipe)
+ errorhandling.CloseQuiet(childStartPipe)
+ childrenClosed = true
+
+ if err != nil {
+ return nil, nil, fmt.Errorf("cannot start container %s: %w", c.ID(), err)
+ }
+ if err := r.moveConmonToCgroupAndSignal(c, execCmd, parentStartPipe); err != nil {
+ return nil, nil, err
+ }
+
+ // These fds were passed down to the runtime. Close them
+ // and not interfere
+ for _, f := range filesToClose {
+ errorhandling.CloseQuiet(f)
+ }
+
+ return execCmd, pipes, nil
+}
+
+// Attach to a container over HTTP
+func attachExecHTTP(c *Container, sessionID string, r *http.Request, w http.ResponseWriter, streams *HTTPAttachStreams, pipes *execPipes, detachKeys []byte, isTerminal bool, cancel <-chan bool, hijackDone chan<- bool, holdConnOpen <-chan bool, execCmd *exec.Cmd, conmonPipeDataChan chan<- conmonPipeData, ociLog string, newSize *resize.TerminalSize, runtimeName string) (deferredErr error) {
+ // NOTE: As you may notice, the attach code is quite complex.
+ // Many things happen concurrently and yet are interdependent.
+ // If you ever change this function, make sure to write to the
+ // conmonPipeDataChan in case of an error.
+
+ if pipes == nil || pipes.startPipe == nil || pipes.attachPipe == nil {
+ err := fmt.Errorf("must provide a start and attach pipe to finish an exec attach: %w", define.ErrInvalidArg)
+ conmonPipeDataChan <- conmonPipeData{-1, err}
+ return err
+ }
+
+ defer func() {
+ if !pipes.startClosed {
+ errorhandling.CloseQuiet(pipes.startPipe)
+ pipes.startClosed = true
+ }
+ if !pipes.attachClosed {
+ errorhandling.CloseQuiet(pipes.attachPipe)
+ pipes.attachClosed = true
+ }
+ }()
+
+ logrus.Debugf("Attaching to container %s exec session %s", c.ID(), sessionID)
+
+ // set up the socket path, such that it is the correct length and location for exec
+ sockPath, err := c.execAttachSocketPath(sessionID)
+ if err != nil {
+ conmonPipeDataChan <- conmonPipeData{-1, err}
+ return err
+ }
+
+ // 2: read from attachFd that the parent process has set up the console socket
+ if _, err := readConmonPipeData(runtimeName, pipes.attachPipe, ""); err != nil {
+ conmonPipeDataChan <- conmonPipeData{-1, err}
+ return err
+ }
+
+ // resize before we start the container process
+ if newSize != nil {
+ err = c.ociRuntime.ExecAttachResize(c, sessionID, *newSize)
+ if err != nil {
+ logrus.Warnf("Resize failed: %v", err)
+ }
+ }
+
+ // 2: then attach
+ conn, err := openUnixSocket(sockPath)
+ if err != nil {
+ conmonPipeDataChan <- conmonPipeData{-1, err}
+ return fmt.Errorf("failed to connect to container's attach socket: %v: %w", sockPath, err)
+ }
+ defer func() {
+ if err := conn.Close(); err != nil {
+ logrus.Errorf("Unable to close socket: %q", err)
+ }
+ }()
+
+ attachStdout := true
+ attachStderr := true
+ attachStdin := true
+ if streams != nil {
+ attachStdout = streams.Stdout
+ attachStderr = streams.Stderr
+ attachStdin = streams.Stdin
+ }
+
+ // Perform hijack
+ hijacker, ok := w.(http.Hijacker)
+ if !ok {
+ conmonPipeDataChan <- conmonPipeData{-1, err}
+ return errors.New("unable to hijack connection")
+ }
+
+ httpCon, httpBuf, err := hijacker.Hijack()
+ if err != nil {
+ conmonPipeDataChan <- conmonPipeData{-1, err}
+ return fmt.Errorf("hijacking connection: %w", err)
+ }
+
+ hijackDone <- true
+
+ // Write a header to let the client know what happened
+ writeHijackHeader(r, httpBuf)
+
+ // Force a flush after the header is written.
+ if err := httpBuf.Flush(); err != nil {
+ conmonPipeDataChan <- conmonPipeData{-1, err}
+ return fmt.Errorf("flushing HTTP hijack header: %w", err)
+ }
+
+ go func() {
+ // Wait for conmon to succeed, when return.
+ if err := execCmd.Wait(); err != nil {
+ conmonPipeDataChan <- conmonPipeData{-1, err}
+ } else {
+ pid, err := readConmonPipeData(runtimeName, pipes.syncPipe, ociLog)
+ if err != nil {
+ hijackWriteError(err, c.ID(), isTerminal, httpBuf)
+ conmonPipeDataChan <- conmonPipeData{pid, err}
+ } else {
+ conmonPipeDataChan <- conmonPipeData{pid, err}
+ }
+ }
+ // We need to hold the connection open until the complete exec
+ // function has finished. This channel will be closed in a defer
+ // in that function, so we can wait for it here.
+ // Can't be a defer, because this would block the function from
+ // returning.
+ <-holdConnOpen
+ hijackWriteErrorAndClose(deferredErr, c.ID(), isTerminal, httpCon, httpBuf)
+ }()
+
+ stdoutChan := make(chan error)
+ stdinChan := make(chan error)
+
+ // Next, STDIN. Avoid entirely if attachStdin unset.
+ if attachStdin {
+ go func() {
+ logrus.Debugf("Beginning STDIN copy")
+ _, err := cutil.CopyDetachable(conn, httpBuf, detachKeys)
+ logrus.Debugf("STDIN copy completed")
+ stdinChan <- err
+ }()
+ }
+
+ // 4: send start message to child
+ if err := writeConmonPipeData(pipes.startPipe); err != nil {
+ return err
+ }
+
+ // Handle STDOUT/STDERR *after* start message is sent
+ go func() {
+ var err error
+ if isTerminal {
+ // Hack: return immediately if attachStdout not set to
+ // emulate Docker.
+ // Basically, when terminal is set, STDERR goes nowhere.
+ // Everything does over STDOUT.
+ // Therefore, if not attaching STDOUT - we'll never copy
+ // anything from here.
+ logrus.Debugf("Performing terminal HTTP attach for container %s", c.ID())
+ if attachStdout {
+ err = httpAttachTerminalCopy(conn, httpBuf, c.ID())
+ }
+ } else {
+ logrus.Debugf("Performing non-terminal HTTP attach for container %s", c.ID())
+ err = httpAttachNonTerminalCopy(conn, httpBuf, c.ID(), attachStdin, attachStdout, attachStderr)
+ }
+ stdoutChan <- err
+ logrus.Debugf("STDOUT/ERR copy completed")
+ }()
+
+ for {
+ select {
+ case err := <-stdoutChan:
+ if err != nil {
+ return err
+ }
+
+ return nil
+ case err := <-stdinChan:
+ if err != nil {
+ return err
+ }
+ // copy stdin is done, close it
+ if connErr := socketCloseWrite(conn); connErr != nil {
+ logrus.Errorf("Unable to close conn: %v", connErr)
+ }
+ case <-cancel:
+ return nil
+ }
+ }
+}
+
+// prepareProcessExec returns the path of the process.json used in runc exec -p
+// caller is responsible to close the returned *os.File if needed.
+func (c *Container) prepareProcessExec(options *ExecOptions, env []string, sessionID string) (*os.File, error) {
+ f, err := ioutil.TempFile(c.execBundlePath(sessionID), "exec-process-")
+ if err != nil {
+ return nil, err
+ }
+ pspec := new(spec.Process)
+ if err := JSONDeepCopy(c.config.Spec.Process, pspec); err != nil {
+ return nil, err
+ }
+ pspec.SelinuxLabel = c.config.ProcessLabel
+ pspec.Args = options.Cmd
+
+ // We need to default this to false else it will inherit terminal as true
+ // from the container.
+ pspec.Terminal = false
+ if options.Terminal {
+ pspec.Terminal = true
+ }
+ if len(env) > 0 {
+ pspec.Env = append(pspec.Env, env...)
+ }
+
+ // Add secret envs if they exist
+ manager, err := c.runtime.SecretsManager()
+ if err != nil {
+ return nil, err
+ }
+ for name, secr := range c.config.EnvSecrets {
+ _, data, err := manager.LookupSecretData(secr.Name)
+ if err != nil {
+ return nil, err
+ }
+ pspec.Env = append(pspec.Env, fmt.Sprintf("%s=%s", name, string(data)))
+ }
+
+ if options.Cwd != "" {
+ pspec.Cwd = options.Cwd
+ }
+
+ var addGroups []string
+ var sgids []uint32
+
+ // if the user is empty, we should inherit the user that the container is currently running with
+ user := options.User
+ if user == "" {
+ logrus.Debugf("Set user to %s", c.config.User)
+ user = c.config.User
+ addGroups = c.config.Groups
+ }
+
+ overrides := c.getUserOverrides()
+ execUser, err := lookup.GetUserGroupInfo(c.state.Mountpoint, user, overrides)
+ if err != nil {
+ return nil, err
+ }
+
+ if len(addGroups) > 0 {
+ sgids, err = lookup.GetContainerGroups(addGroups, c.state.Mountpoint, overrides)
+ if err != nil {
+ return nil, fmt.Errorf("looking up supplemental groups for container %s exec session %s: %w", c.ID(), sessionID, err)
+ }
+ }
+
+ // If user was set, look it up in the container to get a UID to use on
+ // the host
+ if user != "" || len(sgids) > 0 {
+ if user != "" {
+ for _, sgid := range execUser.Sgids {
+ sgids = append(sgids, uint32(sgid))
+ }
+ }
+ processUser := spec.User{
+ UID: uint32(execUser.Uid),
+ GID: uint32(execUser.Gid),
+ AdditionalGids: sgids,
+ }
+
+ pspec.User = processUser
+ }
+
+ if err := c.setProcessCapabilitiesExec(options, user, execUser, pspec); err != nil {
+ return nil, err
+ }
+
+ hasHomeSet := false
+ for _, s := range pspec.Env {
+ if strings.HasPrefix(s, "HOME=") {
+ hasHomeSet = true
+ break
+ }
+ }
+ if !hasHomeSet {
+ pspec.Env = append(pspec.Env, fmt.Sprintf("HOME=%s", execUser.Home))
+ }
+
+ processJSON, err := json.Marshal(pspec)
+ if err != nil {
+ return nil, err
+ }
+
+ if err := ioutil.WriteFile(f.Name(), processJSON, 0644); err != nil {
+ return nil, err
+ }
+ return f, nil
+}
diff --git a/libpod/oci_conmon_exec_freebsd.go b/libpod/oci_conmon_exec_freebsd.go
new file mode 100644
index 000000000..bf30404a1
--- /dev/null
+++ b/libpod/oci_conmon_exec_freebsd.go
@@ -0,0 +1,10 @@
+package libpod
+
+import (
+ "github.com/opencontainers/runc/libcontainer/user"
+ spec "github.com/opencontainers/runtime-spec/specs-go"
+)
+
+func (c *Container) setProcessCapabilitiesExec(options *ExecOptions, user string, execUser *user.ExecUser, pspec *spec.Process) error {
+ return nil
+}
diff --git a/libpod/oci_conmon_exec_linux.go b/libpod/oci_conmon_exec_linux.go
index 16cd7ef9f..617e8d601 100644
--- a/libpod/oci_conmon_exec_linux.go
+++ b/libpod/oci_conmon_exec_linux.go
@@ -1,758 +1,20 @@
package libpod
import (
- "errors"
- "fmt"
- "io/ioutil"
- "net/http"
- "os"
- "os/exec"
- "path/filepath"
- "strings"
- "syscall"
- "time"
-
"github.com/containers/common/pkg/capabilities"
- "github.com/containers/common/pkg/config"
- "github.com/containers/common/pkg/resize"
- cutil "github.com/containers/common/pkg/util"
- "github.com/containers/podman/v4/libpod/define"
- "github.com/containers/podman/v4/pkg/errorhandling"
- "github.com/containers/podman/v4/pkg/lookup"
- "github.com/containers/podman/v4/pkg/util"
+ "github.com/opencontainers/runc/libcontainer/user"
spec "github.com/opencontainers/runtime-spec/specs-go"
- "github.com/sirupsen/logrus"
- "golang.org/x/sys/unix"
)
-// ExecContainer executes a command in a running container
-func (r *ConmonOCIRuntime) ExecContainer(c *Container, sessionID string, options *ExecOptions, streams *define.AttachStreams, newSize *resize.TerminalSize) (int, chan error, error) {
- if options == nil {
- return -1, nil, fmt.Errorf("must provide an ExecOptions struct to ExecContainer: %w", define.ErrInvalidArg)
- }
- if len(options.Cmd) == 0 {
- return -1, nil, fmt.Errorf("must provide a command to execute: %w", define.ErrInvalidArg)
- }
-
- if sessionID == "" {
- return -1, nil, fmt.Errorf("must provide a session ID for exec: %w", define.ErrEmptyID)
- }
-
- // TODO: Should we default this to false?
- // Or maybe make streams mandatory?
- attachStdin := true
- if streams != nil {
- attachStdin = streams.AttachInput
- }
-
- var ociLog string
- if logrus.GetLevel() != logrus.DebugLevel && r.supportsJSON {
- ociLog = c.execOCILog(sessionID)
- }
-
- execCmd, pipes, err := r.startExec(c, sessionID, options, attachStdin, ociLog)
- if err != nil {
- return -1, nil, err
- }
-
- // Only close sync pipe. Start and attach are consumed in the attach
- // goroutine.
- defer func() {
- if pipes.syncPipe != nil && !pipes.syncClosed {
- errorhandling.CloseQuiet(pipes.syncPipe)
- pipes.syncClosed = true
- }
- }()
-
- // TODO Only create if !detach
- // Attach to the container before starting it
- attachChan := make(chan error)
- go func() {
- // attachToExec is responsible for closing pipes
- attachChan <- c.attachToExec(streams, options.DetachKeys, sessionID, pipes.startPipe, pipes.attachPipe, newSize)
- close(attachChan)
- }()
-
- if err := execCmd.Wait(); err != nil {
- return -1, nil, fmt.Errorf("cannot run conmon: %w", err)
- }
-
- pid, err := readConmonPipeData(r.name, pipes.syncPipe, ociLog)
-
- return pid, attachChan, err
-}
-
-// ExecContainerHTTP executes a new command in an existing container and
-// forwards its standard streams over an attach
-func (r *ConmonOCIRuntime) ExecContainerHTTP(ctr *Container, sessionID string, options *ExecOptions, req *http.Request, w http.ResponseWriter,
- streams *HTTPAttachStreams, cancel <-chan bool, hijackDone chan<- bool, holdConnOpen <-chan bool, newSize *resize.TerminalSize) (int, chan error, error) {
- if streams != nil {
- if !streams.Stdin && !streams.Stdout && !streams.Stderr {
- return -1, nil, fmt.Errorf("must provide at least one stream to attach to: %w", define.ErrInvalidArg)
- }
- }
-
- if options == nil {
- return -1, nil, fmt.Errorf("must provide exec options to ExecContainerHTTP: %w", define.ErrInvalidArg)
- }
-
- detachString := config.DefaultDetachKeys
- if options.DetachKeys != nil {
- detachString = *options.DetachKeys
- }
- detachKeys, err := processDetachKeys(detachString)
- if err != nil {
- return -1, nil, err
- }
-
- // TODO: Should we default this to false?
- // Or maybe make streams mandatory?
- attachStdin := true
- if streams != nil {
- attachStdin = streams.Stdin
- }
-
- var ociLog string
- if logrus.GetLevel() != logrus.DebugLevel && r.supportsJSON {
- ociLog = ctr.execOCILog(sessionID)
- }
-
- execCmd, pipes, err := r.startExec(ctr, sessionID, options, attachStdin, ociLog)
- if err != nil {
- return -1, nil, err
- }
-
- // Only close sync pipe. Start and attach are consumed in the attach
- // goroutine.
- defer func() {
- if pipes.syncPipe != nil && !pipes.syncClosed {
- errorhandling.CloseQuiet(pipes.syncPipe)
- pipes.syncClosed = true
- }
- }()
-
- attachChan := make(chan error)
- conmonPipeDataChan := make(chan conmonPipeData)
- go func() {
- // attachToExec is responsible for closing pipes
- attachChan <- attachExecHTTP(ctr, sessionID, req, w, streams, pipes, detachKeys, options.Terminal, cancel, hijackDone, holdConnOpen, execCmd, conmonPipeDataChan, ociLog, newSize, r.name)
- close(attachChan)
- }()
-
- // NOTE: the channel is needed to communicate conmon's data. In case
- // of an error, the error will be written on the hijacked http
- // connection such that remote clients will receive the error.
- pipeData := <-conmonPipeDataChan
-
- return pipeData.pid, attachChan, pipeData.err
-}
-
-// conmonPipeData contains the data when reading from conmon's pipe.
-type conmonPipeData struct {
- pid int
- err error
-}
-
-// ExecContainerDetached executes a command in a running container, but does
-// not attach to it.
-func (r *ConmonOCIRuntime) ExecContainerDetached(ctr *Container, sessionID string, options *ExecOptions, stdin bool) (int, error) {
- if options == nil {
- return -1, fmt.Errorf("must provide exec options to ExecContainerHTTP: %w", define.ErrInvalidArg)
- }
-
- var ociLog string
- if logrus.GetLevel() != logrus.DebugLevel && r.supportsJSON {
- ociLog = ctr.execOCILog(sessionID)
- }
-
- execCmd, pipes, err := r.startExec(ctr, sessionID, options, stdin, ociLog)
- if err != nil {
- return -1, err
- }
-
- defer func() {
- pipes.cleanup()
- }()
-
- // Wait for Conmon to tell us we're ready to attach.
- // We aren't actually *going* to attach, but this means that we're good
- // to proceed.
- if _, err := readConmonPipeData(r.name, pipes.attachPipe, ""); err != nil {
- return -1, err
- }
-
- // Start the exec session
- if err := writeConmonPipeData(pipes.startPipe); err != nil {
- return -1, err
- }
-
- // Wait for conmon to succeed, when return.
- if err := execCmd.Wait(); err != nil {
- return -1, fmt.Errorf("cannot run conmon: %w", err)
- }
-
- pid, err := readConmonPipeData(r.name, pipes.syncPipe, ociLog)
-
- return pid, err
-}
-
-// ExecAttachResize resizes the TTY of the given exec session.
-func (r *ConmonOCIRuntime) ExecAttachResize(ctr *Container, sessionID string, newSize resize.TerminalSize) error {
- controlFile, err := openControlFile(ctr, ctr.execBundlePath(sessionID))
- if err != nil {
- return err
- }
- defer controlFile.Close()
-
- if _, err = fmt.Fprintf(controlFile, "%d %d %d\n", 1, newSize.Height, newSize.Width); err != nil {
- return fmt.Errorf("failed to write to ctl file to resize terminal: %w", err)
- }
-
- return nil
-}
-
-// ExecStopContainer stops a given exec session in a running container.
-func (r *ConmonOCIRuntime) ExecStopContainer(ctr *Container, sessionID string, timeout uint) error {
- pid, err := ctr.getExecSessionPID(sessionID)
- if err != nil {
- return err
- }
-
- logrus.Debugf("Going to stop container %s exec session %s", ctr.ID(), sessionID)
-
- // Is the session dead?
- // Ping the PID with signal 0 to see if it still exists.
- if err := unix.Kill(pid, 0); err != nil {
- if err == unix.ESRCH {
- return nil
- }
- return fmt.Errorf("error pinging container %s exec session %s PID %d with signal 0: %w", ctr.ID(), sessionID, pid, err)
- }
-
- if timeout > 0 {
- // Use SIGTERM by default, then SIGSTOP after timeout.
- logrus.Debugf("Killing exec session %s (PID %d) of container %s with SIGTERM", sessionID, pid, ctr.ID())
- if err := unix.Kill(pid, unix.SIGTERM); err != nil {
- if err == unix.ESRCH {
- return nil
- }
- return fmt.Errorf("error killing container %s exec session %s PID %d with SIGTERM: %w", ctr.ID(), sessionID, pid, err)
- }
-
- // Wait for the PID to stop
- if err := waitPidStop(pid, time.Duration(timeout)*time.Second); err != nil {
- logrus.Infof("Timed out waiting for container %s exec session %s to stop, resorting to SIGKILL: %v", ctr.ID(), sessionID, err)
- } else {
- // No error, container is dead
- return nil
- }
- }
-
- // SIGTERM did not work. On to SIGKILL.
- logrus.Debugf("Killing exec session %s (PID %d) of container %s with SIGKILL", sessionID, pid, ctr.ID())
- if err := unix.Kill(pid, unix.SIGTERM); err != nil {
- if err == unix.ESRCH {
- return nil
- }
- return fmt.Errorf("error killing container %s exec session %s PID %d with SIGKILL: %w", ctr.ID(), sessionID, pid, err)
- }
-
- // Wait for the PID to stop
- if err := waitPidStop(pid, killContainerTimeout); err != nil {
- return fmt.Errorf("timed out waiting for container %s exec session %s PID %d to stop after SIGKILL: %w", ctr.ID(), sessionID, pid, err)
- }
-
- return nil
-}
-
-// ExecUpdateStatus checks if the given exec session is still running.
-func (r *ConmonOCIRuntime) ExecUpdateStatus(ctr *Container, sessionID string) (bool, error) {
- pid, err := ctr.getExecSessionPID(sessionID)
- if err != nil {
- return false, err
- }
-
- logrus.Debugf("Checking status of container %s exec session %s", ctr.ID(), sessionID)
-
- // Is the session dead?
- // Ping the PID with signal 0 to see if it still exists.
- if err := unix.Kill(pid, 0); err != nil {
- if err == unix.ESRCH {
- return false, nil
- }
- return false, fmt.Errorf("error pinging container %s exec session %s PID %d with signal 0: %w", ctr.ID(), sessionID, pid, err)
- }
-
- return true, nil
-}
-
-// ExecAttachSocketPath is the path to a container's exec session attach socket.
-func (r *ConmonOCIRuntime) ExecAttachSocketPath(ctr *Container, sessionID string) (string, error) {
- // We don't even use container, so don't validity check it
- if sessionID == "" {
- return "", fmt.Errorf("must provide a valid session ID to get attach socket path: %w", define.ErrInvalidArg)
- }
-
- return filepath.Join(ctr.execBundlePath(sessionID), "attach"), nil
-}
-
-// This contains pipes used by the exec API.
-type execPipes struct {
- syncPipe *os.File
- syncClosed bool
- startPipe *os.File
- startClosed bool
- attachPipe *os.File
- attachClosed bool
-}
-
-func (p *execPipes) cleanup() {
- if p.syncPipe != nil && !p.syncClosed {
- errorhandling.CloseQuiet(p.syncPipe)
- p.syncClosed = true
- }
- if p.startPipe != nil && !p.startClosed {
- errorhandling.CloseQuiet(p.startPipe)
- p.startClosed = true
- }
- if p.attachPipe != nil && !p.attachClosed {
- errorhandling.CloseQuiet(p.attachPipe)
- p.attachClosed = true
- }
-}
-
-// Start an exec session's conmon parent from the given options.
-func (r *ConmonOCIRuntime) startExec(c *Container, sessionID string, options *ExecOptions, attachStdin bool, ociLog string) (_ *exec.Cmd, _ *execPipes, deferredErr error) {
- pipes := new(execPipes)
-
- if options == nil {
- return nil, nil, fmt.Errorf("must provide an ExecOptions struct to ExecContainer: %w", define.ErrInvalidArg)
- }
- if len(options.Cmd) == 0 {
- return nil, nil, fmt.Errorf("must provide a command to execute: %w", define.ErrInvalidArg)
- }
-
- if sessionID == "" {
- return nil, nil, fmt.Errorf("must provide a session ID for exec: %w", define.ErrEmptyID)
- }
-
- // create sync pipe to receive the pid
- parentSyncPipe, childSyncPipe, err := newPipe()
- if err != nil {
- return nil, nil, fmt.Errorf("error creating socket pair: %w", err)
- }
- pipes.syncPipe = parentSyncPipe
-
- defer func() {
- if deferredErr != nil {
- pipes.cleanup()
- }
- }()
-
- // create start pipe to set the cgroup before running
- // attachToExec is responsible for closing parentStartPipe
- childStartPipe, parentStartPipe, err := newPipe()
- if err != nil {
- return nil, nil, fmt.Errorf("error creating socket pair: %w", err)
- }
- pipes.startPipe = parentStartPipe
-
- // create the attach pipe to allow attach socket to be created before
- // $RUNTIME exec starts running. This is to make sure we can capture all output
- // from the process through that socket, rather than half reading the log, half attaching to the socket
- // attachToExec is responsible for closing parentAttachPipe
- parentAttachPipe, childAttachPipe, err := newPipe()
- if err != nil {
- return nil, nil, fmt.Errorf("error creating socket pair: %w", err)
- }
- pipes.attachPipe = parentAttachPipe
-
- childrenClosed := false
- defer func() {
- if !childrenClosed {
- errorhandling.CloseQuiet(childSyncPipe)
- errorhandling.CloseQuiet(childAttachPipe)
- errorhandling.CloseQuiet(childStartPipe)
- }
- }()
-
- runtimeDir, err := util.GetRuntimeDir()
- if err != nil {
- return nil, nil, err
- }
-
- finalEnv := make([]string, 0, len(options.Env))
- for k, v := range options.Env {
- finalEnv = append(finalEnv, fmt.Sprintf("%s=%s", k, v))
- }
-
- processFile, err := prepareProcessExec(c, options, finalEnv, sessionID)
- if err != nil {
- return nil, nil, err
- }
- defer processFile.Close()
-
- args := r.sharedConmonArgs(c, sessionID, c.execBundlePath(sessionID), c.execPidPath(sessionID), c.execLogPath(sessionID), c.execExitFileDir(sessionID), ociLog, define.NoLogging, c.config.LogTag)
-
- if options.PreserveFDs > 0 {
- args = append(args, formatRuntimeOpts("--preserve-fds", fmt.Sprintf("%d", options.PreserveFDs))...)
- }
-
- if options.Terminal {
- args = append(args, "-t")
- }
-
- if attachStdin {
- args = append(args, "-i")
- }
-
- // Append container ID and command
- args = append(args, "-e")
- // TODO make this optional when we can detach
- args = append(args, "--exec-attach")
- args = append(args, "--exec-process-spec", processFile.Name())
-
- if len(options.ExitCommand) > 0 {
- args = append(args, "--exit-command", options.ExitCommand[0])
- for _, arg := range options.ExitCommand[1:] {
- args = append(args, []string{"--exit-command-arg", arg}...)
- }
- if options.ExitCommandDelay > 0 {
- args = append(args, []string{"--exit-delay", fmt.Sprintf("%d", options.ExitCommandDelay)}...)
- }
- }
-
- logrus.WithFields(logrus.Fields{
- "args": args,
- }).Debugf("running conmon: %s", r.conmonPath)
- execCmd := exec.Command(r.conmonPath, args...)
-
- // TODO: This is commented because it doesn't make much sense in HTTP
- // attach, and I'm not certain it does for non-HTTP attach as well.
- // if streams != nil {
- // // Don't add the InputStream to the execCmd. Instead, the data should be passed
- // // through CopyDetachable
- // if streams.AttachOutput {
- // execCmd.Stdout = options.Streams.OutputStream
- // }
- // if streams.AttachError {
- // execCmd.Stderr = options.Streams.ErrorStream
- // }
- // }
-
- conmonEnv := r.configureConmonEnv(runtimeDir)
-
- var filesToClose []*os.File
- if options.PreserveFDs > 0 {
- for fd := 3; fd < int(3+options.PreserveFDs); fd++ {
- f := os.NewFile(uintptr(fd), fmt.Sprintf("fd-%d", fd))
- filesToClose = append(filesToClose, f)
- execCmd.ExtraFiles = append(execCmd.ExtraFiles, f)
- }
- }
-
- // we don't want to step on users fds they asked to preserve
- // Since 0-2 are used for stdio, start the fds we pass in at preserveFDs+3
- execCmd.Env = r.conmonEnv
- execCmd.Env = append(execCmd.Env, fmt.Sprintf("_OCI_SYNCPIPE=%d", options.PreserveFDs+3), fmt.Sprintf("_OCI_STARTPIPE=%d", options.PreserveFDs+4), fmt.Sprintf("_OCI_ATTACHPIPE=%d", options.PreserveFDs+5))
- execCmd.Env = append(execCmd.Env, conmonEnv...)
-
- execCmd.ExtraFiles = append(execCmd.ExtraFiles, childSyncPipe, childStartPipe, childAttachPipe)
- execCmd.Dir = c.execBundlePath(sessionID)
- execCmd.SysProcAttr = &syscall.SysProcAttr{
- Setpgid: true,
- }
-
- err = startCommand(execCmd, c)
-
- // We don't need children pipes on the parent side
- errorhandling.CloseQuiet(childSyncPipe)
- errorhandling.CloseQuiet(childAttachPipe)
- errorhandling.CloseQuiet(childStartPipe)
- childrenClosed = true
-
- if err != nil {
- return nil, nil, fmt.Errorf("cannot start container %s: %w", c.ID(), err)
- }
- if err := r.moveConmonToCgroupAndSignal(c, execCmd, parentStartPipe); err != nil {
- return nil, nil, err
- }
-
- // These fds were passed down to the runtime. Close them
- // and not interfere
- for _, f := range filesToClose {
- errorhandling.CloseQuiet(f)
- }
-
- return execCmd, pipes, nil
-}
-
-// Attach to a container over HTTP
-func attachExecHTTP(c *Container, sessionID string, r *http.Request, w http.ResponseWriter, streams *HTTPAttachStreams, pipes *execPipes, detachKeys []byte, isTerminal bool, cancel <-chan bool, hijackDone chan<- bool, holdConnOpen <-chan bool, execCmd *exec.Cmd, conmonPipeDataChan chan<- conmonPipeData, ociLog string, newSize *resize.TerminalSize, runtimeName string) (deferredErr error) {
- // NOTE: As you may notice, the attach code is quite complex.
- // Many things happen concurrently and yet are interdependent.
- // If you ever change this function, make sure to write to the
- // conmonPipeDataChan in case of an error.
-
- if pipes == nil || pipes.startPipe == nil || pipes.attachPipe == nil {
- err := fmt.Errorf("must provide a start and attach pipe to finish an exec attach: %w", define.ErrInvalidArg)
- conmonPipeDataChan <- conmonPipeData{-1, err}
- return err
- }
-
- defer func() {
- if !pipes.startClosed {
- errorhandling.CloseQuiet(pipes.startPipe)
- pipes.startClosed = true
- }
- if !pipes.attachClosed {
- errorhandling.CloseQuiet(pipes.attachPipe)
- pipes.attachClosed = true
- }
- }()
-
- logrus.Debugf("Attaching to container %s exec session %s", c.ID(), sessionID)
-
- // set up the socket path, such that it is the correct length and location for exec
- sockPath, err := c.execAttachSocketPath(sessionID)
- if err != nil {
- conmonPipeDataChan <- conmonPipeData{-1, err}
- return err
- }
-
- // 2: read from attachFd that the parent process has set up the console socket
- if _, err := readConmonPipeData(runtimeName, pipes.attachPipe, ""); err != nil {
- conmonPipeDataChan <- conmonPipeData{-1, err}
- return err
- }
-
- // resize before we start the container process
- if newSize != nil {
- err = c.ociRuntime.ExecAttachResize(c, sessionID, *newSize)
- if err != nil {
- logrus.Warnf("Resize failed: %v", err)
- }
- }
-
- // 2: then attach
- conn, err := openUnixSocket(sockPath)
- if err != nil {
- conmonPipeDataChan <- conmonPipeData{-1, err}
- return fmt.Errorf("failed to connect to container's attach socket: %v: %w", sockPath, err)
- }
- defer func() {
- if err := conn.Close(); err != nil {
- logrus.Errorf("Unable to close socket: %q", err)
- }
- }()
-
- attachStdout := true
- attachStderr := true
- attachStdin := true
- if streams != nil {
- attachStdout = streams.Stdout
- attachStderr = streams.Stderr
- attachStdin = streams.Stdin
- }
-
- // Perform hijack
- hijacker, ok := w.(http.Hijacker)
- if !ok {
- conmonPipeDataChan <- conmonPipeData{-1, err}
- return errors.New("unable to hijack connection")
- }
-
- httpCon, httpBuf, err := hijacker.Hijack()
- if err != nil {
- conmonPipeDataChan <- conmonPipeData{-1, err}
- return fmt.Errorf("error hijacking connection: %w", err)
- }
-
- hijackDone <- true
-
- // Write a header to let the client know what happened
- writeHijackHeader(r, httpBuf)
-
- // Force a flush after the header is written.
- if err := httpBuf.Flush(); err != nil {
- conmonPipeDataChan <- conmonPipeData{-1, err}
- return fmt.Errorf("error flushing HTTP hijack header: %w", err)
- }
-
- go func() {
- // Wait for conmon to succeed, when return.
- if err := execCmd.Wait(); err != nil {
- conmonPipeDataChan <- conmonPipeData{-1, err}
- } else {
- pid, err := readConmonPipeData(runtimeName, pipes.syncPipe, ociLog)
- if err != nil {
- hijackWriteError(err, c.ID(), isTerminal, httpBuf)
- conmonPipeDataChan <- conmonPipeData{pid, err}
- } else {
- conmonPipeDataChan <- conmonPipeData{pid, err}
- }
- }
- // We need to hold the connection open until the complete exec
- // function has finished. This channel will be closed in a defer
- // in that function, so we can wait for it here.
- // Can't be a defer, because this would block the function from
- // returning.
- <-holdConnOpen
- hijackWriteErrorAndClose(deferredErr, c.ID(), isTerminal, httpCon, httpBuf)
- }()
-
- stdoutChan := make(chan error)
- stdinChan := make(chan error)
-
- // Next, STDIN. Avoid entirely if attachStdin unset.
- if attachStdin {
- go func() {
- logrus.Debugf("Beginning STDIN copy")
- _, err := cutil.CopyDetachable(conn, httpBuf, detachKeys)
- logrus.Debugf("STDIN copy completed")
- stdinChan <- err
- }()
- }
-
- // 4: send start message to child
- if err := writeConmonPipeData(pipes.startPipe); err != nil {
- return err
- }
-
- // Handle STDOUT/STDERR *after* start message is sent
- go func() {
- var err error
- if isTerminal {
- // Hack: return immediately if attachStdout not set to
- // emulate Docker.
- // Basically, when terminal is set, STDERR goes nowhere.
- // Everything does over STDOUT.
- // Therefore, if not attaching STDOUT - we'll never copy
- // anything from here.
- logrus.Debugf("Performing terminal HTTP attach for container %s", c.ID())
- if attachStdout {
- err = httpAttachTerminalCopy(conn, httpBuf, c.ID())
- }
- } else {
- logrus.Debugf("Performing non-terminal HTTP attach for container %s", c.ID())
- err = httpAttachNonTerminalCopy(conn, httpBuf, c.ID(), attachStdin, attachStdout, attachStderr)
- }
- stdoutChan <- err
- logrus.Debugf("STDOUT/ERR copy completed")
- }()
-
- for {
- select {
- case err := <-stdoutChan:
- if err != nil {
- return err
- }
-
- return nil
- case err := <-stdinChan:
- if err != nil {
- return err
- }
- // copy stdin is done, close it
- if connErr := conn.CloseWrite(); connErr != nil {
- logrus.Errorf("Unable to close conn: %v", connErr)
- }
- case <-cancel:
- return nil
- }
- }
-}
-
-// prepareProcessExec returns the path of the process.json used in runc exec -p
-// caller is responsible to close the returned *os.File if needed.
-func prepareProcessExec(c *Container, options *ExecOptions, env []string, sessionID string) (*os.File, error) {
- f, err := ioutil.TempFile(c.execBundlePath(sessionID), "exec-process-")
- if err != nil {
- return nil, err
- }
- pspec := new(spec.Process)
- if err := JSONDeepCopy(c.config.Spec.Process, pspec); err != nil {
- return nil, err
- }
- pspec.SelinuxLabel = c.config.ProcessLabel
- pspec.Args = options.Cmd
-
- // We need to default this to false else it will inherit terminal as true
- // from the container.
- pspec.Terminal = false
- if options.Terminal {
- pspec.Terminal = true
- }
- if len(env) > 0 {
- pspec.Env = append(pspec.Env, env...)
- }
-
- // Add secret envs if they exist
- manager, err := c.runtime.SecretsManager()
- if err != nil {
- return nil, err
- }
- for name, secr := range c.config.EnvSecrets {
- _, data, err := manager.LookupSecretData(secr.Name)
- if err != nil {
- return nil, err
- }
- pspec.Env = append(pspec.Env, fmt.Sprintf("%s=%s", name, string(data)))
- }
-
- if options.Cwd != "" {
- pspec.Cwd = options.Cwd
- }
-
- var addGroups []string
- var sgids []uint32
-
- // if the user is empty, we should inherit the user that the container is currently running with
- user := options.User
- if user == "" {
- logrus.Debugf("Set user to %s", c.config.User)
- user = c.config.User
- addGroups = c.config.Groups
- }
-
- overrides := c.getUserOverrides()
- execUser, err := lookup.GetUserGroupInfo(c.state.Mountpoint, user, overrides)
- if err != nil {
- return nil, err
- }
-
- if len(addGroups) > 0 {
- sgids, err = lookup.GetContainerGroups(addGroups, c.state.Mountpoint, overrides)
- if err != nil {
- return nil, fmt.Errorf("error looking up supplemental groups for container %s exec session %s: %w", c.ID(), sessionID, err)
- }
- }
-
- // If user was set, look it up in the container to get a UID to use on
- // the host
- if user != "" || len(sgids) > 0 {
- if user != "" {
- for _, sgid := range execUser.Sgids {
- sgids = append(sgids, uint32(sgid))
- }
- }
- processUser := spec.User{
- UID: uint32(execUser.Uid),
- GID: uint32(execUser.Gid),
- AdditionalGids: sgids,
- }
-
- pspec.User = processUser
- }
-
+func (c *Container) setProcessCapabilitiesExec(options *ExecOptions, user string, execUser *user.ExecUser, pspec *spec.Process) error {
ctrSpec, err := c.specFromState()
if err != nil {
- return nil, err
+ return err
}
allCaps, err := capabilities.BoundingSet()
if err != nil {
- return nil, err
+ return err
}
if options.Privileged {
pspec.Capabilities.Bounding = allCaps
@@ -773,25 +35,5 @@ func prepareProcessExec(c *Container, options *ExecOptions, env []string, sessio
pspec.Capabilities.Permitted = ctrSpec.Process.Capabilities.Effective
pspec.Capabilities.Ambient = ctrSpec.Process.Capabilities.Effective
}
-
- hasHomeSet := false
- for _, s := range pspec.Env {
- if strings.HasPrefix(s, "HOME=") {
- hasHomeSet = true
- break
- }
- }
- if !hasHomeSet {
- pspec.Env = append(pspec.Env, fmt.Sprintf("HOME=%s", execUser.Home))
- }
-
- processJSON, err := json.Marshal(pspec)
- if err != nil {
- return nil, err
- }
-
- if err := ioutil.WriteFile(f.Name(), processJSON, 0644); err != nil {
- return nil, err
- }
- return f, nil
+ return nil
}
diff --git a/libpod/oci_conmon_freebsd.go b/libpod/oci_conmon_freebsd.go
new file mode 100644
index 000000000..d74f2af01
--- /dev/null
+++ b/libpod/oci_conmon_freebsd.go
@@ -0,0 +1,27 @@
+package libpod
+
+import (
+ "errors"
+ "os"
+ "os/exec"
+)
+
+func (r *ConmonOCIRuntime) createRootlessContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (int64, error) {
+ return -1, errors.New("unsupported (*ConmonOCIRuntime) createRootlessContainer")
+}
+
+// Run the closure with the container's socket label set
+func (r *ConmonOCIRuntime) withContainerSocketLabel(ctr *Container, closure func() error) error {
+ // No label support yet
+ return closure()
+}
+
+// moveConmonToCgroupAndSignal gets a container's cgroupParent and moves the conmon process to that cgroup
+// it then signals for conmon to start by sending nonce data down the start fd
+func (r *ConmonOCIRuntime) moveConmonToCgroupAndSignal(ctr *Container, cmd *exec.Cmd, startFd *os.File) error {
+ // No equivalent to cgroup on FreeBSD, just signal conmon to start
+ if err := writeConmonPipeData(startFd); err != nil {
+ return err
+ }
+ return nil
+}
diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go
index cb76de72c..0964d4ea3 100644
--- a/libpod/oci_conmon_linux.go
+++ b/libpod/oci_conmon_linux.go
@@ -1,46 +1,21 @@
-//go:build linux
-// +build linux
-
package libpod
import (
- "bufio"
- "bytes"
- "context"
- "errors"
"fmt"
- "io"
- "io/ioutil"
- "net"
- "net/http"
"os"
"os/exec"
"path/filepath"
"runtime"
- "strconv"
"strings"
- "sync"
- "syscall"
- "text/template"
- "time"
runcconfig "github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/devices"
"github.com/containers/common/pkg/cgroups"
"github.com/containers/common/pkg/config"
- "github.com/containers/common/pkg/resize"
- cutil "github.com/containers/common/pkg/util"
- conmonConfig "github.com/containers/conmon/runner/config"
- "github.com/containers/podman/v4/libpod/define"
- "github.com/containers/podman/v4/libpod/logs"
- "github.com/containers/podman/v4/pkg/checkpoint/crutils"
"github.com/containers/podman/v4/pkg/errorhandling"
"github.com/containers/podman/v4/pkg/rootless"
- "github.com/containers/podman/v4/pkg/specgenutil"
- "github.com/containers/podman/v4/pkg/util"
"github.com/containers/podman/v4/utils"
- "github.com/containers/storage/pkg/homedir"
pmount "github.com/containers/storage/pkg/mount"
spec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/selinux/go-selinux/label"
@@ -48,782 +23,70 @@ import (
"golang.org/x/sys/unix"
)
-const (
- // This is Conmon's STDIO_BUF_SIZE. I don't believe we have access to it
- // directly from the Go code, so const it here
- // Important: The conmon attach socket uses an extra byte at the beginning of each
- // message to specify the STREAM so we have to increase the buffer size by one
- bufferSize = conmonConfig.BufSize + 1
-)
-
-// ConmonOCIRuntime is an OCI runtime managed by Conmon.
-// TODO: Make all calls to OCI runtime have a timeout.
-type ConmonOCIRuntime struct {
- name string
- path string
- conmonPath string
- conmonEnv []string
- tmpDir string
- exitsDir string
- logSizeMax int64
- noPivot bool
- reservePorts bool
- runtimeFlags []string
- supportsJSON bool
- supportsKVM bool
- supportsNoCgroups bool
- enableKeyring bool
-}
-
-// Make a new Conmon-based OCI runtime with the given options.
-// Conmon will wrap the given OCI runtime, which can be `runc`, `crun`, or
-// any runtime with a runc-compatible CLI.
-// The first path that points to a valid executable will be used.
-// Deliberately private. Someone should not be able to construct this outside of
-// libpod.
-func newConmonOCIRuntime(name string, paths []string, conmonPath string, runtimeFlags []string, runtimeCfg *config.Config) (OCIRuntime, error) {
- if name == "" {
- return nil, fmt.Errorf("the OCI runtime must be provided a non-empty name: %w", define.ErrInvalidArg)
- }
-
- // Make lookup tables for runtime support
- supportsJSON := make(map[string]bool, len(runtimeCfg.Engine.RuntimeSupportsJSON))
- supportsNoCgroups := make(map[string]bool, len(runtimeCfg.Engine.RuntimeSupportsNoCgroups))
- supportsKVM := make(map[string]bool, len(runtimeCfg.Engine.RuntimeSupportsKVM))
- for _, r := range runtimeCfg.Engine.RuntimeSupportsJSON {
- supportsJSON[r] = true
- }
- for _, r := range runtimeCfg.Engine.RuntimeSupportsNoCgroups {
- supportsNoCgroups[r] = true
- }
- for _, r := range runtimeCfg.Engine.RuntimeSupportsKVM {
- supportsKVM[r] = true
- }
-
- runtime := new(ConmonOCIRuntime)
- runtime.name = name
- runtime.conmonPath = conmonPath
- runtime.runtimeFlags = runtimeFlags
-
- runtime.conmonEnv = runtimeCfg.Engine.ConmonEnvVars
- runtime.tmpDir = runtimeCfg.Engine.TmpDir
- runtime.logSizeMax = runtimeCfg.Containers.LogSizeMax
- runtime.noPivot = runtimeCfg.Engine.NoPivotRoot
- runtime.reservePorts = runtimeCfg.Engine.EnablePortReservation
- runtime.enableKeyring = runtimeCfg.Containers.EnableKeyring
-
- // TODO: probe OCI runtime for feature and enable automatically if
- // available.
-
- base := filepath.Base(name)
- runtime.supportsJSON = supportsJSON[base]
- runtime.supportsNoCgroups = supportsNoCgroups[base]
- runtime.supportsKVM = supportsKVM[base]
-
- foundPath := false
- for _, path := range paths {
- stat, err := os.Stat(path)
- if err != nil {
- if os.IsNotExist(err) {
- continue
- }
- return nil, fmt.Errorf("cannot stat OCI runtime %s path: %w", name, err)
- }
- if !stat.Mode().IsRegular() {
- continue
- }
- foundPath = true
- logrus.Tracef("found runtime %q", path)
- runtime.path = path
- break
- }
-
- // Search the $PATH as last fallback
- if !foundPath {
- if foundRuntime, err := exec.LookPath(name); err == nil {
- foundPath = true
- runtime.path = foundRuntime
- logrus.Debugf("using runtime %q from $PATH: %q", name, foundRuntime)
- }
- }
-
- if !foundPath {
- return nil, fmt.Errorf("no valid executable found for OCI runtime %s: %w", name, define.ErrInvalidArg)
- }
-
- runtime.exitsDir = filepath.Join(runtime.tmpDir, "exits")
-
- // Create the exit files and attach sockets directories
- if err := os.MkdirAll(runtime.exitsDir, 0750); err != nil {
- // The directory is allowed to exist
- if !os.IsExist(err) {
- return nil, fmt.Errorf("error creating OCI runtime exit files directory: %w", err)
- }
+func (r *ConmonOCIRuntime) createRootlessContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (int64, error) {
+ type result struct {
+ restoreDuration int64
+ err error
}
- return runtime, nil
-}
-
-// Name returns the name of the runtime being wrapped by Conmon.
-func (r *ConmonOCIRuntime) Name() string {
- return r.name
-}
-
-// Path returns the path of the OCI runtime being wrapped by Conmon.
-func (r *ConmonOCIRuntime) Path() string {
- return r.path
-}
-
-// hasCurrentUserMapped checks whether the current user is mapped inside the container user namespace
-func hasCurrentUserMapped(ctr *Container) bool {
- if len(ctr.config.IDMappings.UIDMap) == 0 && len(ctr.config.IDMappings.GIDMap) == 0 {
- return true
- }
- uid := os.Geteuid()
- for _, m := range ctr.config.IDMappings.UIDMap {
- if uid >= m.HostID && uid < m.HostID+m.Size {
- return true
- }
- }
- return false
-}
-
-// CreateContainer creates a container.
-func (r *ConmonOCIRuntime) CreateContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (int64, error) {
- // always make the run dir accessible to the current user so that the PID files can be read without
- // being in the rootless user namespace.
- if err := makeAccessible(ctr.state.RunDir, 0, 0); err != nil {
- return 0, err
- }
- if !hasCurrentUserMapped(ctr) {
- for _, i := range []string{ctr.state.RunDir, ctr.runtime.config.Engine.TmpDir, ctr.config.StaticDir, ctr.state.Mountpoint, ctr.runtime.config.Engine.VolumePath} {
- if err := makeAccessible(i, ctr.RootUID(), ctr.RootGID()); err != nil {
+ ch := make(chan result)
+ go func() {
+ runtime.LockOSThread()
+ restoreDuration, err := func() (int64, error) {
+ fd, err := os.Open(fmt.Sprintf("/proc/%d/task/%d/ns/mnt", os.Getpid(), unix.Gettid()))
+ if err != nil {
return 0, err
}
- }
+ defer errorhandling.CloseQuiet(fd)
- // if we are running a non privileged container, be sure to umount some kernel paths so they are not
- // bind mounted inside the container at all.
- if !ctr.config.Privileged && !rootless.IsRootless() {
- type result struct {
- restoreDuration int64
- err error
+ // create a new mountns on the current thread
+ if err = unix.Unshare(unix.CLONE_NEWNS); err != nil {
+ return 0, err
}
- ch := make(chan result)
- go func() {
- runtime.LockOSThread()
- restoreDuration, err := func() (int64, error) {
- fd, err := os.Open(fmt.Sprintf("/proc/%d/task/%d/ns/mnt", os.Getpid(), unix.Gettid()))
- if err != nil {
- return 0, err
- }
- defer errorhandling.CloseQuiet(fd)
-
- // create a new mountns on the current thread
- if err = unix.Unshare(unix.CLONE_NEWNS); err != nil {
- return 0, err
- }
- defer func() {
- if err := unix.Setns(int(fd.Fd()), unix.CLONE_NEWNS); err != nil {
- logrus.Errorf("Unable to clone new namespace: %q", err)
- }
- }()
-
- // don't spread our mounts around. We are setting only /sys to be slave
- // so that the cleanup process is still able to umount the storage and the
- // changes are propagated to the host.
- err = unix.Mount("/sys", "/sys", "none", unix.MS_REC|unix.MS_SLAVE, "")
- if err != nil {
- return 0, fmt.Errorf("cannot make /sys slave: %w", err)
- }
-
- mounts, err := pmount.GetMounts()
- if err != nil {
- return 0, err
- }
- for _, m := range mounts {
- if !strings.HasPrefix(m.Mountpoint, "/sys/kernel") {
- continue
- }
- err = unix.Unmount(m.Mountpoint, 0)
- if err != nil && !os.IsNotExist(err) {
- return 0, fmt.Errorf("cannot unmount %s: %w", m.Mountpoint, err)
- }
- }
- return r.createOCIContainer(ctr, restoreOptions)
- }()
- ch <- result{
- restoreDuration: restoreDuration,
- err: err,
+ defer func() {
+ if err := unix.Setns(int(fd.Fd()), unix.CLONE_NEWNS); err != nil {
+ logrus.Errorf("Unable to clone new namespace: %q", err)
}
}()
- r := <-ch
- return r.restoreDuration, r.err
- }
- }
- return r.createOCIContainer(ctr, restoreOptions)
-}
-
-// UpdateContainerStatus retrieves the current status of the container from the
-// runtime. It updates the container's state but does not save it.
-// If useRuntime is false, we will not directly hit runc to see the container's
-// status, but will instead only check for the existence of the conmon exit file
-// and update state to stopped if it exists.
-func (r *ConmonOCIRuntime) UpdateContainerStatus(ctr *Container) error {
- runtimeDir, err := util.GetRuntimeDir()
- if err != nil {
- return err
- }
-
- // Store old state so we know if we were already stopped
- oldState := ctr.state.State
-
- state := new(spec.State)
-
- cmd := exec.Command(r.path, "state", ctr.ID())
- cmd.Env = append(cmd.Env, fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir))
-
- outPipe, err := cmd.StdoutPipe()
- if err != nil {
- return fmt.Errorf("getting stdout pipe: %w", err)
- }
- errPipe, err := cmd.StderrPipe()
- if err != nil {
- return fmt.Errorf("getting stderr pipe: %w", err)
- }
-
- if err := cmd.Start(); err != nil {
- out, err2 := ioutil.ReadAll(errPipe)
- if err2 != nil {
- return fmt.Errorf("error getting container %s state: %w", ctr.ID(), err)
- }
- if strings.Contains(string(out), "does not exist") || strings.Contains(string(out), "No such file") {
- if err := ctr.removeConmonFiles(); err != nil {
- logrus.Debugf("unable to remove conmon files for container %s", ctr.ID())
- }
- ctr.state.ExitCode = -1
- ctr.state.FinishedTime = time.Now()
- ctr.state.State = define.ContainerStateExited
- return ctr.runtime.state.AddContainerExitCode(ctr.ID(), ctr.state.ExitCode)
- }
- return fmt.Errorf("error getting container %s state. stderr/out: %s: %w", ctr.ID(), out, err)
- }
- defer func() {
- _ = cmd.Wait()
- }()
-
- if err := errPipe.Close(); err != nil {
- return err
- }
- out, err := ioutil.ReadAll(outPipe)
- if err != nil {
- return fmt.Errorf("error reading stdout: %s: %w", ctr.ID(), err)
- }
- if err := json.NewDecoder(bytes.NewBuffer(out)).Decode(state); err != nil {
- return fmt.Errorf("error decoding container status for container %s: %w", ctr.ID(), err)
- }
- ctr.state.PID = state.Pid
-
- switch state.Status {
- case "created":
- ctr.state.State = define.ContainerStateCreated
- case "paused":
- ctr.state.State = define.ContainerStatePaused
- case "running":
- ctr.state.State = define.ContainerStateRunning
- case "stopped":
- ctr.state.State = define.ContainerStateStopped
- default:
- return fmt.Errorf("unrecognized status returned by runtime for container %s: %s: %w",
- ctr.ID(), state.Status, define.ErrInternal)
- }
-
- // Only grab exit status if we were not already stopped
- // If we were, it should already be in the database
- if ctr.state.State == define.ContainerStateStopped && oldState != define.ContainerStateStopped {
- if _, err := ctr.Wait(context.Background()); err != nil {
- logrus.Errorf("Waiting for container %s to exit: %v", ctr.ID(), err)
- }
- return nil
- }
-
- // Handle ContainerStateStopping - keep it unless the container
- // transitioned to no longer running.
- if oldState == define.ContainerStateStopping && (ctr.state.State == define.ContainerStatePaused || ctr.state.State == define.ContainerStateRunning) {
- ctr.state.State = define.ContainerStateStopping
- }
- return nil
-}
-
-// StartContainer starts the given container.
-// Sets time the container was started, but does not save it.
-func (r *ConmonOCIRuntime) StartContainer(ctr *Container) error {
- // TODO: streams should probably *not* be our STDIN/OUT/ERR - redirect to buffers?
- runtimeDir, err := util.GetRuntimeDir()
- if err != nil {
- return err
- }
- env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
- if path, ok := os.LookupEnv("PATH"); ok {
- env = append(env, fmt.Sprintf("PATH=%s", path))
- }
- if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, append(r.runtimeFlags, "start", ctr.ID())...); err != nil {
- return err
- }
-
- ctr.state.StartedTime = time.Now()
-
- return nil
-}
-
-// KillContainer sends the given signal to the given container.
-// If all is set, send to all PIDs in the container.
-// All is only supported if the container created cgroups.
-func (r *ConmonOCIRuntime) KillContainer(ctr *Container, signal uint, all bool) error {
- logrus.Debugf("Sending signal %d to container %s", signal, ctr.ID())
- runtimeDir, err := util.GetRuntimeDir()
- if err != nil {
- return err
- }
- env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
- var args []string
- args = append(args, r.runtimeFlags...)
- if all {
- args = append(args, "kill", "--all", ctr.ID(), fmt.Sprintf("%d", signal))
- } else {
- args = append(args, "kill", ctr.ID(), fmt.Sprintf("%d", signal))
- }
- if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, args...); err != nil {
- // Update container state - there's a chance we failed because
- // the container exited in the meantime.
- if err2 := r.UpdateContainerStatus(ctr); err2 != nil {
- logrus.Infof("Error updating status for container %s: %v", ctr.ID(), err2)
- }
- if ctr.ensureState(define.ContainerStateStopped, define.ContainerStateExited) {
- return define.ErrCtrStateInvalid
- }
- return fmt.Errorf("error sending signal to container %s: %w", ctr.ID(), err)
- }
-
- return nil
-}
-
-// StopContainer stops a container, first using its given stop signal (or
-// SIGTERM if no signal was specified), then using SIGKILL.
-// Timeout is given in seconds. If timeout is 0, the container will be
-// immediately kill with SIGKILL.
-// Does not set finished time for container, assumes you will run updateStatus
-// after to pull the exit code.
-func (r *ConmonOCIRuntime) StopContainer(ctr *Container, timeout uint, all bool) error {
- logrus.Debugf("Stopping container %s (PID %d)", ctr.ID(), ctr.state.PID)
-
- // Ping the container to see if it's alive
- // If it's not, it's already stopped, return
- err := unix.Kill(ctr.state.PID, 0)
- if err == unix.ESRCH {
- return nil
- }
-
- stopSignal := ctr.config.StopSignal
- if stopSignal == 0 {
- stopSignal = uint(syscall.SIGTERM)
- }
-
- if timeout > 0 {
- if err := r.KillContainer(ctr, stopSignal, all); err != nil {
- // Is the container gone?
- // If so, it probably died between the first check and
- // our sending the signal
- // The container is stopped, so exit cleanly
- err := unix.Kill(ctr.state.PID, 0)
- if err == unix.ESRCH {
- return nil
+ // don't spread our mounts around. We are setting only /sys to be slave
+ // so that the cleanup process is still able to umount the storage and the
+ // changes are propagated to the host.
+ err = unix.Mount("/sys", "/sys", "none", unix.MS_REC|unix.MS_SLAVE, "")
+ if err != nil {
+ return 0, fmt.Errorf("cannot make /sys slave: %w", err)
}
- return err
- }
-
- if err := waitContainerStop(ctr, time.Duration(timeout)*time.Second); err != nil {
- logrus.Debugf("Timed out stopping container %s with %s, resorting to SIGKILL: %v", ctr.ID(), unix.SignalName(syscall.Signal(stopSignal)), err)
- logrus.Warnf("StopSignal %s failed to stop container %s in %d seconds, resorting to SIGKILL", unix.SignalName(syscall.Signal(stopSignal)), ctr.Name(), timeout)
- } else {
- // No error, the container is dead
- return nil
- }
- }
-
- if err := r.KillContainer(ctr, 9, all); err != nil {
- // Again, check if the container is gone. If it is, exit cleanly.
- err := unix.Kill(ctr.state.PID, 0)
- if err == unix.ESRCH {
- return nil
- }
-
- return fmt.Errorf("error sending SIGKILL to container %s: %w", ctr.ID(), err)
- }
-
- // Give runtime a few seconds to make it happen
- if err := waitContainerStop(ctr, killContainerTimeout); err != nil {
- return err
- }
-
- return nil
-}
-
-// DeleteContainer deletes a container from the OCI runtime.
-func (r *ConmonOCIRuntime) DeleteContainer(ctr *Container) error {
- runtimeDir, err := util.GetRuntimeDir()
- if err != nil {
- return err
- }
- env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
- return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, append(r.runtimeFlags, "delete", "--force", ctr.ID())...)
-}
-
-// PauseContainer pauses the given container.
-func (r *ConmonOCIRuntime) PauseContainer(ctr *Container) error {
- runtimeDir, err := util.GetRuntimeDir()
- if err != nil {
- return err
- }
- env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
- return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, append(r.runtimeFlags, "pause", ctr.ID())...)
-}
-
-// UnpauseContainer unpauses the given container.
-func (r *ConmonOCIRuntime) UnpauseContainer(ctr *Container) error {
- runtimeDir, err := util.GetRuntimeDir()
- if err != nil {
- return err
- }
- env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
- return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, append(r.runtimeFlags, "resume", ctr.ID())...)
-}
-
-// HTTPAttach performs an attach for the HTTP API.
-// The caller must handle closing the HTTP connection after this returns.
-// The cancel channel is not closed; it is up to the caller to do so after
-// this function returns.
-// If this is a container with a terminal, we will stream raw. If it is not, we
-// will stream with an 8-byte header to multiplex STDOUT and STDERR.
-// Returns any errors that occurred, and whether the connection was successfully
-// hijacked before that error occurred.
-func (r *ConmonOCIRuntime) HTTPAttach(ctr *Container, req *http.Request, w http.ResponseWriter, streams *HTTPAttachStreams, detachKeys *string, cancel <-chan bool, hijackDone chan<- bool, streamAttach, streamLogs bool) (deferredErr error) {
- isTerminal := false
- if ctr.config.Spec.Process != nil {
- isTerminal = ctr.config.Spec.Process.Terminal
- }
-
- if streams != nil {
- if !streams.Stdin && !streams.Stdout && !streams.Stderr {
- return fmt.Errorf("must specify at least one stream to attach to: %w", define.ErrInvalidArg)
- }
- }
-
- attachSock, err := r.AttachSocketPath(ctr)
- if err != nil {
- return err
- }
-
- var conn *net.UnixConn
- if streamAttach {
- newConn, err := openUnixSocket(attachSock)
- if err != nil {
- return fmt.Errorf("failed to connect to container's attach socket: %v: %w", attachSock, err)
- }
- conn = newConn
- defer func() {
- if err := conn.Close(); err != nil {
- logrus.Errorf("Unable to close container %s attach socket: %q", ctr.ID(), err)
+ mounts, err := pmount.GetMounts()
+ if err != nil {
+ return 0, err
}
- }()
-
- logrus.Debugf("Successfully connected to container %s attach socket %s", ctr.ID(), attachSock)
- }
-
- detachString := ctr.runtime.config.Engine.DetachKeys
- if detachKeys != nil {
- detachString = *detachKeys
- }
- detach, err := processDetachKeys(detachString)
- if err != nil {
- return err
- }
-
- attachStdout := true
- attachStderr := true
- attachStdin := true
- if streams != nil {
- attachStdout = streams.Stdout
- attachStderr = streams.Stderr
- attachStdin = streams.Stdin
- }
-
- logrus.Debugf("Going to hijack container %s attach connection", ctr.ID())
-
- // Alright, let's hijack.
- hijacker, ok := w.(http.Hijacker)
- if !ok {
- return fmt.Errorf("unable to hijack connection")
- }
-
- httpCon, httpBuf, err := hijacker.Hijack()
- if err != nil {
- return fmt.Errorf("error hijacking connection: %w", err)
- }
-
- hijackDone <- true
-
- writeHijackHeader(req, httpBuf)
-
- // Force a flush after the header is written.
- if err := httpBuf.Flush(); err != nil {
- return fmt.Errorf("error flushing HTTP hijack header: %w", err)
- }
-
- defer func() {
- hijackWriteErrorAndClose(deferredErr, ctr.ID(), isTerminal, httpCon, httpBuf)
- }()
-
- logrus.Debugf("Hijack for container %s attach session done, ready to stream", ctr.ID())
-
- // TODO: This is gross. Really, really gross.
- // I want to say we should read all the logs into an array before
- // calling this, in container_api.go, but that could take a lot of
- // memory...
- // On the whole, we need to figure out a better way of doing this,
- // though.
- logSize := 0
- if streamLogs {
- logrus.Debugf("Will stream logs for container %s attach session", ctr.ID())
-
- // Get all logs for the container
- logChan := make(chan *logs.LogLine)
- logOpts := new(logs.LogOptions)
- logOpts.Tail = -1
- logOpts.WaitGroup = new(sync.WaitGroup)
- errChan := make(chan error)
- go func() {
- var err error
- // In non-terminal mode we need to prepend with the
- // stream header.
- logrus.Debugf("Writing logs for container %s to HTTP attach", ctr.ID())
- for logLine := range logChan {
- if !isTerminal {
- device := logLine.Device
- var header []byte
- headerLen := uint32(len(logLine.Msg))
- logSize += len(logLine.Msg)
- switch strings.ToLower(device) {
- case "stdin":
- header = makeHTTPAttachHeader(0, headerLen)
- case "stdout":
- header = makeHTTPAttachHeader(1, headerLen)
- case "stderr":
- header = makeHTTPAttachHeader(2, headerLen)
- default:
- logrus.Errorf("Unknown device for log line: %s", device)
- header = makeHTTPAttachHeader(1, headerLen)
- }
- _, err = httpBuf.Write(header)
- if err != nil {
- break
- }
- }
- _, err = httpBuf.Write([]byte(logLine.Msg))
- if err != nil {
- break
- }
- if !logLine.Partial() {
- _, err = httpBuf.Write([]byte("\n"))
- if err != nil {
- break
- }
+ for _, m := range mounts {
+ if !strings.HasPrefix(m.Mountpoint, "/sys/kernel") {
+ continue
}
- err = httpBuf.Flush()
- if err != nil {
- break
+ err = unix.Unmount(m.Mountpoint, 0)
+ if err != nil && !os.IsNotExist(err) {
+ return 0, fmt.Errorf("cannot unmount %s: %w", m.Mountpoint, err)
}
}
- errChan <- err
- }()
- if err := ctr.ReadLog(context.Background(), logOpts, logChan, 0); err != nil {
- return err
- }
- go func() {
- logOpts.WaitGroup.Wait()
- close(logChan)
+ return r.createOCIContainer(ctr, restoreOptions)
}()
- logrus.Debugf("Done reading logs for container %s, %d bytes", ctr.ID(), logSize)
- if err := <-errChan; err != nil {
- return err
+ ch <- result{
+ restoreDuration: restoreDuration,
+ err: err,
}
- }
- if !streamAttach {
- logrus.Debugf("Done streaming logs for container %s attach, exiting as attach streaming not requested", ctr.ID())
- return nil
- }
-
- logrus.Debugf("Forwarding attach output for container %s", ctr.ID())
-
- stdoutChan := make(chan error)
- stdinChan := make(chan error)
-
- // Handle STDOUT/STDERR
- go func() {
- var err error
- if isTerminal {
- // Hack: return immediately if attachStdout not set to
- // emulate Docker.
- // Basically, when terminal is set, STDERR goes nowhere.
- // Everything does over STDOUT.
- // Therefore, if not attaching STDOUT - we'll never copy
- // anything from here.
- logrus.Debugf("Performing terminal HTTP attach for container %s", ctr.ID())
- if attachStdout {
- err = httpAttachTerminalCopy(conn, httpBuf, ctr.ID())
- }
- } else {
- logrus.Debugf("Performing non-terminal HTTP attach for container %s", ctr.ID())
- err = httpAttachNonTerminalCopy(conn, httpBuf, ctr.ID(), attachStdin, attachStdout, attachStderr)
- }
- stdoutChan <- err
- logrus.Debugf("STDOUT/ERR copy completed")
}()
- // Next, STDIN. Avoid entirely if attachStdin unset.
- if attachStdin {
- go func() {
- _, err := cutil.CopyDetachable(conn, httpBuf, detach)
- logrus.Debugf("STDIN copy completed")
- stdinChan <- err
- }()
- }
-
- for {
- select {
- case err := <-stdoutChan:
- if err != nil {
- return err
- }
-
- return nil
- case err := <-stdinChan:
- if err != nil {
- return err
- }
- // copy stdin is done, close it
- if connErr := conn.CloseWrite(); connErr != nil {
- logrus.Errorf("Unable to close conn: %v", connErr)
- }
- case <-cancel:
- return nil
- }
- }
-}
-
-// isRetryable returns whether the error was caused by a blocked syscall or the
-// specified operation on a non blocking file descriptor wasn't ready for completion.
-func isRetryable(err error) bool {
- var errno syscall.Errno
- if errors.As(err, &errno) {
- return errno == syscall.EINTR || errno == syscall.EAGAIN
- }
- return false
-}
-
-// openControlFile opens the terminal control file.
-func openControlFile(ctr *Container, parentDir string) (*os.File, error) {
- controlPath := filepath.Join(parentDir, "ctl")
- for i := 0; i < 600; i++ {
- controlFile, err := os.OpenFile(controlPath, unix.O_WRONLY|unix.O_NONBLOCK, 0)
- if err == nil {
- return controlFile, nil
- }
- if !isRetryable(err) {
- return nil, fmt.Errorf("could not open ctl file for terminal resize for container %s: %w", ctr.ID(), err)
- }
- time.Sleep(time.Second / 10)
- }
- return nil, fmt.Errorf("timeout waiting for %q", controlPath)
+ res := <-ch
+ return res.restoreDuration, res.err
}
-// AttachResize resizes the terminal used by the given container.
-func (r *ConmonOCIRuntime) AttachResize(ctr *Container, newSize resize.TerminalSize) error {
- controlFile, err := openControlFile(ctr, ctr.bundlePath())
- if err != nil {
- return err
- }
- defer controlFile.Close()
-
- logrus.Debugf("Received a resize event for container %s: %+v", ctr.ID(), newSize)
- if _, err = fmt.Fprintf(controlFile, "%d %d %d\n", 1, newSize.Height, newSize.Width); err != nil {
- return fmt.Errorf("failed to write to ctl file to resize terminal: %w", err)
- }
-
- return nil
-}
-
-// CheckpointContainer checkpoints the given container.
-func (r *ConmonOCIRuntime) CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) (int64, error) {
- // imagePath is used by CRIU to store the actual checkpoint files
- imagePath := ctr.CheckpointPath()
- if options.PreCheckPoint {
- imagePath = ctr.PreCheckPointPath()
- }
- // workPath will be used to store dump.log and stats-dump
- workPath := ctr.bundlePath()
- logrus.Debugf("Writing checkpoint to %s", imagePath)
- logrus.Debugf("Writing checkpoint logs to %s", workPath)
- logrus.Debugf("Pre-dump the container %t", options.PreCheckPoint)
- args := []string{}
- args = append(args, r.runtimeFlags...)
- args = append(args, "checkpoint")
- args = append(args, "--image-path")
- args = append(args, imagePath)
- args = append(args, "--work-path")
- args = append(args, workPath)
- if options.KeepRunning {
- args = append(args, "--leave-running")
- }
- if options.TCPEstablished {
- args = append(args, "--tcp-established")
- }
- if options.FileLocks {
- args = append(args, "--file-locks")
- }
- if !options.PreCheckPoint && options.KeepRunning {
- args = append(args, "--leave-running")
- }
- if options.PreCheckPoint {
- args = append(args, "--pre-dump")
- }
- if !options.PreCheckPoint && options.WithPrevious {
- args = append(
- args,
- "--parent-path",
- filepath.Join("..", preCheckpointDir),
- )
- }
-
- args = append(args, ctr.ID())
- logrus.Debugf("the args to checkpoint: %s %s", r.path, strings.Join(args, " "))
-
- runtimeDir, err := util.GetRuntimeDir()
- if err != nil {
- return 0, err
- }
- env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
- if path, ok := os.LookupEnv("PATH"); ok {
- env = append(env, fmt.Sprintf("PATH=%s", path))
- }
-
+// Run the closure with the container's socket label set
+func (r *ConmonOCIRuntime) withContainerSocketLabel(ctr *Container, closure func() error) error {
runtime.LockOSThread()
if err := label.SetSocketLabel(ctr.ProcessLabel()); err != nil {
- return 0, err
+ return err
}
-
- runtimeCheckpointStarted := time.Now()
- err = utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, args...)
+ err := closure()
// Ignore error returned from SetSocketLabel("") call,
// can't recover.
if labelErr := label.SetSocketLabel(""); labelErr == nil {
@@ -834,577 +97,7 @@ func (r *ConmonOCIRuntime) CheckpointContainer(ctr *Container, options Container
} else {
logrus.Errorf("Unable to reset socket label: %q", labelErr)
}
-
- runtimeCheckpointDuration := func() int64 {
- if options.PrintStats {
- return time.Since(runtimeCheckpointStarted).Microseconds()
- }
- return 0
- }()
-
- return runtimeCheckpointDuration, err
-}
-
-func (r *ConmonOCIRuntime) CheckConmonRunning(ctr *Container) (bool, error) {
- if ctr.state.ConmonPID == 0 {
- // If the container is running or paused, assume Conmon is
- // running. We didn't record Conmon PID on some old versions, so
- // that is likely what's going on...
- // Unusual enough that we should print a warning message though.
- if ctr.ensureState(define.ContainerStateRunning, define.ContainerStatePaused) {
- logrus.Warnf("Conmon PID is not set, but container is running!")
- return true, nil
- }
- // Container's not running, so conmon PID being unset is
- // expected. Conmon is not running.
- return false, nil
- }
-
- // We have a conmon PID. Ping it with signal 0.
- if err := unix.Kill(ctr.state.ConmonPID, 0); err != nil {
- if err == unix.ESRCH {
- return false, nil
- }
- return false, fmt.Errorf("error pinging container %s conmon with signal 0: %w", ctr.ID(), err)
- }
- return true, nil
-}
-
-// SupportsCheckpoint checks if the OCI runtime supports checkpointing
-// containers.
-func (r *ConmonOCIRuntime) SupportsCheckpoint() bool {
- return crutils.CRRuntimeSupportsCheckpointRestore(r.path)
-}
-
-// SupportsJSONErrors checks if the OCI runtime supports JSON-formatted error
-// messages.
-func (r *ConmonOCIRuntime) SupportsJSONErrors() bool {
- return r.supportsJSON
-}
-
-// SupportsNoCgroups checks if the OCI runtime supports running containers
-// without cgroups (the --cgroup-manager=disabled flag).
-func (r *ConmonOCIRuntime) SupportsNoCgroups() bool {
- return r.supportsNoCgroups
-}
-
-// SupportsKVM checks if the OCI runtime supports running containers
-// without KVM separation
-func (r *ConmonOCIRuntime) SupportsKVM() bool {
- return r.supportsKVM
-}
-
-// AttachSocketPath is the path to a single container's attach socket.
-func (r *ConmonOCIRuntime) AttachSocketPath(ctr *Container) (string, error) {
- if ctr == nil {
- return "", fmt.Errorf("must provide a valid container to get attach socket path: %w", define.ErrInvalidArg)
- }
-
- return filepath.Join(ctr.bundlePath(), "attach"), nil
-}
-
-// ExitFilePath is the path to a container's exit file.
-func (r *ConmonOCIRuntime) ExitFilePath(ctr *Container) (string, error) {
- if ctr == nil {
- return "", fmt.Errorf("must provide a valid container to get exit file path: %w", define.ErrInvalidArg)
- }
- return filepath.Join(r.exitsDir, ctr.ID()), nil
-}
-
-// RuntimeInfo provides information on the runtime.
-func (r *ConmonOCIRuntime) RuntimeInfo() (*define.ConmonInfo, *define.OCIRuntimeInfo, error) {
- runtimePackage := packageVersion(r.path)
- conmonPackage := packageVersion(r.conmonPath)
- runtimeVersion, err := r.getOCIRuntimeVersion()
- if err != nil {
- return nil, nil, fmt.Errorf("error getting version of OCI runtime %s: %w", r.name, err)
- }
- conmonVersion, err := r.getConmonVersion()
- if err != nil {
- return nil, nil, fmt.Errorf("error getting conmon version: %w", err)
- }
-
- conmon := define.ConmonInfo{
- Package: conmonPackage,
- Path: r.conmonPath,
- Version: conmonVersion,
- }
- ocirt := define.OCIRuntimeInfo{
- Name: r.name,
- Path: r.path,
- Package: runtimePackage,
- Version: runtimeVersion,
- }
- return &conmon, &ocirt, nil
-}
-
-// makeAccessible changes the path permission and each parent directory to have --x--x--x
-func makeAccessible(path string, uid, gid int) error {
- for ; path != "/"; path = filepath.Dir(path) {
- st, err := os.Stat(path)
- if err != nil {
- if os.IsNotExist(err) {
- return nil
- }
- return err
- }
- if int(st.Sys().(*syscall.Stat_t).Uid) == uid && int(st.Sys().(*syscall.Stat_t).Gid) == gid {
- continue
- }
- if st.Mode()&0111 != 0111 {
- if err := os.Chmod(path, st.Mode()|0111); err != nil {
- return err
- }
- }
- }
- return nil
-}
-
-// Wait for a container which has been sent a signal to stop
-func waitContainerStop(ctr *Container, timeout time.Duration) error {
- return waitPidStop(ctr.state.PID, timeout)
-}
-
-// Wait for a given PID to stop
-func waitPidStop(pid int, timeout time.Duration) error {
- done := make(chan struct{})
- chControl := make(chan struct{})
- go func() {
- for {
- select {
- case <-chControl:
- return
- default:
- if err := unix.Kill(pid, 0); err != nil {
- if err == unix.ESRCH {
- close(done)
- return
- }
- logrus.Errorf("Pinging PID %d with signal 0: %v", pid, err)
- }
- time.Sleep(100 * time.Millisecond)
- }
- }
- }()
- select {
- case <-done:
- return nil
- case <-time.After(timeout):
- close(chControl)
- return fmt.Errorf("given PIDs did not die within timeout")
- }
-}
-
-func (r *ConmonOCIRuntime) getLogTag(ctr *Container) (string, error) {
- logTag := ctr.LogTag()
- if logTag == "" {
- return "", nil
- }
- data, err := ctr.inspectLocked(false)
- if err != nil {
- // FIXME: this error should probably be returned
- return "", nil //nolint: nilerr
- }
- tmpl, err := template.New("container").Parse(logTag)
- if err != nil {
- return "", fmt.Errorf("template parsing error %s: %w", logTag, err)
- }
- var b bytes.Buffer
- err = tmpl.Execute(&b, data)
- if err != nil {
- return "", err
- }
- return b.String(), nil
-}
-
-// createOCIContainer generates this container's main conmon instance and prepares it for starting
-func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (int64, error) {
- var stderrBuf bytes.Buffer
-
- runtimeDir, err := util.GetRuntimeDir()
- if err != nil {
- return 0, err
- }
-
- parentSyncPipe, childSyncPipe, err := newPipe()
- if err != nil {
- return 0, fmt.Errorf("error creating socket pair: %w", err)
- }
- defer errorhandling.CloseQuiet(parentSyncPipe)
-
- childStartPipe, parentStartPipe, err := newPipe()
- if err != nil {
- return 0, fmt.Errorf("error creating socket pair for start pipe: %w", err)
- }
-
- defer errorhandling.CloseQuiet(parentStartPipe)
-
- var ociLog string
- if logrus.GetLevel() != logrus.DebugLevel && r.supportsJSON {
- ociLog = filepath.Join(ctr.state.RunDir, "oci-log")
- }
-
- logTag, err := r.getLogTag(ctr)
- if err != nil {
- return 0, err
- }
-
- if ctr.config.CgroupsMode == cgroupSplit {
- if err := utils.MoveUnderCgroupSubtree("runtime"); err != nil {
- return 0, err
- }
- }
-
- pidfile := ctr.config.PidFile
- if pidfile == "" {
- pidfile = filepath.Join(ctr.state.RunDir, "pidfile")
- }
-
- args := r.sharedConmonArgs(ctr, ctr.ID(), ctr.bundlePath(), pidfile, ctr.LogPath(), r.exitsDir, ociLog, ctr.LogDriver(), logTag)
-
- if ctr.config.SdNotifyMode == define.SdNotifyModeContainer && ctr.notifySocket != "" {
- args = append(args, fmt.Sprintf("--sdnotify-socket=%s", ctr.notifySocket))
- }
-
- if ctr.config.Spec.Process.Terminal {
- args = append(args, "-t")
- } else if ctr.config.Stdin {
- args = append(args, "-i")
- }
-
- if ctr.config.Timeout > 0 {
- args = append(args, fmt.Sprintf("--timeout=%d", ctr.config.Timeout))
- }
-
- if !r.enableKeyring {
- args = append(args, "--no-new-keyring")
- }
- if ctr.config.ConmonPidFile != "" {
- args = append(args, "--conmon-pidfile", ctr.config.ConmonPidFile)
- }
-
- if r.noPivot {
- args = append(args, "--no-pivot")
- }
-
- exitCommand, err := specgenutil.CreateExitCommandArgs(ctr.runtime.storageConfig, ctr.runtime.config, logrus.IsLevelEnabled(logrus.DebugLevel), ctr.AutoRemove(), false)
- if err != nil {
- return 0, err
- }
- exitCommand = append(exitCommand, ctr.config.ID)
-
- args = append(args, "--exit-command", exitCommand[0])
- for _, arg := range exitCommand[1:] {
- args = append(args, []string{"--exit-command-arg", arg}...)
- }
-
- // Pass down the LISTEN_* environment (see #10443).
- preserveFDs := ctr.config.PreserveFDs
- if val := os.Getenv("LISTEN_FDS"); val != "" {
- if ctr.config.PreserveFDs > 0 {
- logrus.Warnf("Ignoring LISTEN_FDS to preserve custom user-specified FDs")
- } else {
- fds, err := strconv.Atoi(val)
- if err != nil {
- return 0, fmt.Errorf("converting LISTEN_FDS=%s: %w", val, err)
- }
- preserveFDs = uint(fds)
- }
- }
-
- if preserveFDs > 0 {
- args = append(args, formatRuntimeOpts("--preserve-fds", fmt.Sprintf("%d", preserveFDs))...)
- }
-
- if restoreOptions != nil {
- args = append(args, "--restore", ctr.CheckpointPath())
- if restoreOptions.TCPEstablished {
- args = append(args, "--runtime-opt", "--tcp-established")
- }
- if restoreOptions.FileLocks {
- args = append(args, "--runtime-opt", "--file-locks")
- }
- if restoreOptions.Pod != "" {
- mountLabel := ctr.config.MountLabel
- processLabel := ctr.config.ProcessLabel
- if mountLabel != "" {
- args = append(
- args,
- "--runtime-opt",
- fmt.Sprintf(
- "--lsm-mount-context=%s",
- mountLabel,
- ),
- )
- }
- if processLabel != "" {
- args = append(
- args,
- "--runtime-opt",
- fmt.Sprintf(
- "--lsm-profile=selinux:%s",
- processLabel,
- ),
- )
- }
- }
- }
-
- logrus.WithFields(logrus.Fields{
- "args": args,
- }).Debugf("running conmon: %s", r.conmonPath)
-
- cmd := exec.Command(r.conmonPath, args...)
- cmd.SysProcAttr = &syscall.SysProcAttr{
- Setpgid: true,
- }
- // TODO this is probably a really bad idea for some uses
- // Make this configurable
- cmd.Stdin = os.Stdin
- cmd.Stdout = os.Stdout
- cmd.Stderr = os.Stderr
- if ctr.config.Spec.Process.Terminal {
- cmd.Stderr = &stderrBuf
- }
-
- // 0, 1 and 2 are stdin, stdout and stderr
- conmonEnv := r.configureConmonEnv(runtimeDir)
-
- var filesToClose []*os.File
- if preserveFDs > 0 {
- for fd := 3; fd < int(3+preserveFDs); fd++ {
- f := os.NewFile(uintptr(fd), fmt.Sprintf("fd-%d", fd))
- filesToClose = append(filesToClose, f)
- cmd.ExtraFiles = append(cmd.ExtraFiles, f)
- }
- }
-
- cmd.Env = r.conmonEnv
- // we don't want to step on users fds they asked to preserve
- // Since 0-2 are used for stdio, start the fds we pass in at preserveFDs+3
- cmd.Env = append(cmd.Env, fmt.Sprintf("_OCI_SYNCPIPE=%d", preserveFDs+3), fmt.Sprintf("_OCI_STARTPIPE=%d", preserveFDs+4))
- cmd.Env = append(cmd.Env, conmonEnv...)
- cmd.ExtraFiles = append(cmd.ExtraFiles, childSyncPipe, childStartPipe)
-
- if r.reservePorts && !rootless.IsRootless() && !ctr.config.NetMode.IsSlirp4netns() {
- ports, err := bindPorts(ctr.convertPortMappings())
- if err != nil {
- return 0, err
- }
- filesToClose = append(filesToClose, ports...)
-
- // Leak the port we bound in the conmon process. These fd's won't be used
- // by the container and conmon will keep the ports busy so that another
- // process cannot use them.
- cmd.ExtraFiles = append(cmd.ExtraFiles, ports...)
- }
-
- if ctr.config.NetMode.IsSlirp4netns() || rootless.IsRootless() {
- if ctr.config.PostConfigureNetNS {
- havePortMapping := len(ctr.config.PortMappings) > 0
- if havePortMapping {
- ctr.rootlessPortSyncR, ctr.rootlessPortSyncW, err = os.Pipe()
- if err != nil {
- return 0, fmt.Errorf("failed to create rootless port sync pipe: %w", err)
- }
- }
- ctr.rootlessSlirpSyncR, ctr.rootlessSlirpSyncW, err = os.Pipe()
- if err != nil {
- return 0, fmt.Errorf("failed to create rootless network sync pipe: %w", err)
- }
- } else {
- if ctr.rootlessSlirpSyncR != nil {
- defer errorhandling.CloseQuiet(ctr.rootlessSlirpSyncR)
- }
- if ctr.rootlessSlirpSyncW != nil {
- defer errorhandling.CloseQuiet(ctr.rootlessSlirpSyncW)
- }
- }
- // Leak one end in conmon, the other one will be leaked into slirp4netns
- cmd.ExtraFiles = append(cmd.ExtraFiles, ctr.rootlessSlirpSyncW)
-
- if ctr.rootlessPortSyncW != nil {
- defer errorhandling.CloseQuiet(ctr.rootlessPortSyncW)
- // Leak one end in conmon, the other one will be leaked into rootlessport
- cmd.ExtraFiles = append(cmd.ExtraFiles, ctr.rootlessPortSyncW)
- }
- }
- var runtimeRestoreStarted time.Time
- if restoreOptions != nil {
- runtimeRestoreStarted = time.Now()
- }
- err = startCommand(cmd, ctr)
-
- // regardless of whether we errored or not, we no longer need the children pipes
- childSyncPipe.Close()
- childStartPipe.Close()
- if err != nil {
- return 0, err
- }
- if err := r.moveConmonToCgroupAndSignal(ctr, cmd, parentStartPipe); err != nil {
- return 0, err
- }
- /* Wait for initial setup and fork, and reap child */
- err = cmd.Wait()
- if err != nil {
- return 0, err
- }
-
- pid, err := readConmonPipeData(r.name, parentSyncPipe, ociLog)
- if err != nil {
- if err2 := r.DeleteContainer(ctr); err2 != nil {
- logrus.Errorf("Removing container %s from runtime after creation failed", ctr.ID())
- }
- return 0, err
- }
- ctr.state.PID = pid
-
- conmonPID, err := readConmonPidFile(ctr.config.ConmonPidFile)
- if err != nil {
- logrus.Warnf("Error reading conmon pid file for container %s: %v", ctr.ID(), err)
- } else if conmonPID > 0 {
- // conmon not having a pid file is a valid state, so don't set it if we don't have it
- logrus.Infof("Got Conmon PID as %d", conmonPID)
- ctr.state.ConmonPID = conmonPID
- }
-
- runtimeRestoreDuration := func() int64 {
- if restoreOptions != nil && restoreOptions.PrintStats {
- return time.Since(runtimeRestoreStarted).Microseconds()
- }
- return 0
- }()
-
- // These fds were passed down to the runtime. Close them
- // and not interfere
- for _, f := range filesToClose {
- errorhandling.CloseQuiet(f)
- }
-
- return runtimeRestoreDuration, nil
-}
-
-// configureConmonEnv gets the environment values to add to conmon's exec struct
-// TODO this may want to be less hardcoded/more configurable in the future
-func (r *ConmonOCIRuntime) configureConmonEnv(runtimeDir string) []string {
- var env []string
- for _, e := range os.Environ() {
- if strings.HasPrefix(e, "LC_") {
- env = append(env, e)
- }
- }
- conf, ok := os.LookupEnv("CONTAINERS_CONF")
- if ok {
- env = append(env, fmt.Sprintf("CONTAINERS_CONF=%s", conf))
- }
- env = append(env, fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir))
- env = append(env, fmt.Sprintf("_CONTAINERS_USERNS_CONFIGURED=%s", os.Getenv("_CONTAINERS_USERNS_CONFIGURED")))
- env = append(env, fmt.Sprintf("_CONTAINERS_ROOTLESS_UID=%s", os.Getenv("_CONTAINERS_ROOTLESS_UID")))
- home := homedir.Get()
- if home != "" {
- env = append(env, fmt.Sprintf("HOME=%s", home))
- }
-
- return env
-}
-
-// sharedConmonArgs takes common arguments for exec and create/restore and formats them for the conmon CLI
-func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, pidPath, logPath, exitDir, ociLogPath, logDriver, logTag string) []string {
- // set the conmon API version to be able to use the correct sync struct keys
- args := []string{
- "--api-version", "1",
- "-c", ctr.ID(),
- "-u", cuuid,
- "-r", r.path,
- "-b", bundlePath,
- "-p", pidPath,
- "-n", ctr.Name(),
- "--exit-dir", exitDir,
- "--full-attach",
- }
- if len(r.runtimeFlags) > 0 {
- rFlags := []string{}
- for _, arg := range r.runtimeFlags {
- rFlags = append(rFlags, "--runtime-arg", arg)
- }
- args = append(args, rFlags...)
- }
-
- if ctr.CgroupManager() == config.SystemdCgroupsManager && !ctr.config.NoCgroups && ctr.config.CgroupsMode != cgroupSplit {
- args = append(args, "-s")
- }
-
- var logDriverArg string
- switch logDriver {
- case define.JournaldLogging:
- logDriverArg = define.JournaldLogging
- case define.NoLogging:
- logDriverArg = define.NoLogging
- case define.PassthroughLogging:
- logDriverArg = define.PassthroughLogging
- //lint:ignore ST1015 the default case has to be here
- default: //nolint:stylecheck,gocritic
- // No case here should happen except JSONLogging, but keep this here in case the options are extended
- logrus.Errorf("%s logging specified but not supported. Choosing k8s-file logging instead", ctr.LogDriver())
- fallthrough
- case "":
- // to get here, either a user would specify `--log-driver ""`, or this came from another place in libpod
- // since the former case is obscure, and the latter case isn't an error, let's silently fallthrough
- fallthrough
- case define.JSONLogging:
- fallthrough
- case define.KubernetesLogging:
- logDriverArg = fmt.Sprintf("%s:%s", define.KubernetesLogging, logPath)
- }
-
- args = append(args, "-l", logDriverArg)
- logLevel := logrus.GetLevel()
- args = append(args, "--log-level", logLevel.String())
-
- if logLevel == logrus.DebugLevel {
- logrus.Debugf("%s messages will be logged to syslog", r.conmonPath)
- args = append(args, "--syslog")
- }
-
- size := r.logSizeMax
- if ctr.config.LogSize > 0 {
- size = ctr.config.LogSize
- }
- if size > 0 {
- args = append(args, "--log-size-max", fmt.Sprintf("%v", size))
- }
-
- if ociLogPath != "" {
- args = append(args, "--runtime-arg", "--log-format=json", "--runtime-arg", "--log", fmt.Sprintf("--runtime-arg=%s", ociLogPath))
- }
- if logTag != "" {
- args = append(args, "--log-tag", logTag)
- }
- if ctr.config.NoCgroups {
- logrus.Debugf("Running with no Cgroups")
- args = append(args, "--runtime-arg", "--cgroup-manager", "--runtime-arg", "disabled")
- }
- return args
-}
-
-func startCommand(cmd *exec.Cmd, ctr *Container) error {
- // Make sure to unset the NOTIFY_SOCKET and reset it afterwards if needed.
- switch ctr.config.SdNotifyMode {
- case define.SdNotifyModeContainer, define.SdNotifyModeIgnore:
- if ctr.notifySocket != "" {
- if err := os.Unsetenv("NOTIFY_SOCKET"); err != nil {
- logrus.Warnf("Error unsetting NOTIFY_SOCKET %v", err)
- }
-
- defer func() {
- if err := os.Setenv("NOTIFY_SOCKET", ctr.notifySocket); err != nil {
- logrus.Errorf("Resetting NOTIFY_SOCKET=%s", ctr.notifySocket)
- }
- }()
- }
- }
-
- return cmd.Start()
+ return err
}
// moveConmonToCgroupAndSignal gets a container's cgroupParent and moves the conmon process to that cgroup
@@ -1476,271 +169,6 @@ func (r *ConmonOCIRuntime) moveConmonToCgroupAndSignal(ctr *Container, cmd *exec
return nil
}
-// newPipe creates a unix socket pair for communication.
-// Returns two files - first is parent, second is child.
-func newPipe() (*os.File, *os.File, error) {
- fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_SEQPACKET|unix.SOCK_CLOEXEC, 0)
- if err != nil {
- return nil, nil, err
- }
- return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil
-}
-
-// readConmonPidFile attempts to read conmon's pid from its pid file
-func readConmonPidFile(pidFile string) (int, error) {
- // Let's try reading the Conmon pid at the same time.
- if pidFile != "" {
- contents, err := ioutil.ReadFile(pidFile)
- if err != nil {
- return -1, err
- }
- // Convert it to an int
- conmonPID, err := strconv.Atoi(string(contents))
- if err != nil {
- return -1, err
- }
- return conmonPID, nil
- }
- return 0, nil
-}
-
-// readConmonPipeData attempts to read a syncInfo struct from the pipe
-func readConmonPipeData(runtimeName string, pipe *os.File, ociLog string) (int, error) {
- // syncInfo is used to return data from monitor process to daemon
- type syncInfo struct {
- Data int `json:"data"`
- Message string `json:"message,omitempty"`
- }
-
- // Wait to get container pid from conmon
- type syncStruct struct {
- si *syncInfo
- err error
- }
- ch := make(chan syncStruct)
- go func() {
- var si *syncInfo
- rdr := bufio.NewReader(pipe)
- b, err := rdr.ReadBytes('\n')
- // ignore EOF here, error is returned even when data was read
- // if it is no valid json unmarshal will fail below
- if err != nil && !errors.Is(err, io.EOF) {
- ch <- syncStruct{err: err}
- }
- if err := json.Unmarshal(b, &si); err != nil {
- ch <- syncStruct{err: fmt.Errorf("conmon bytes %q: %w", string(b), err)}
- return
- }
- ch <- syncStruct{si: si}
- }()
-
- data := -1 //nolint: wastedassign
- select {
- case ss := <-ch:
- if ss.err != nil {
- if ociLog != "" {
- ociLogData, err := ioutil.ReadFile(ociLog)
- if err == nil {
- var ociErr ociError
- if err := json.Unmarshal(ociLogData, &ociErr); err == nil {
- return -1, getOCIRuntimeError(runtimeName, ociErr.Msg)
- }
- }
- }
- return -1, fmt.Errorf("container create failed (no logs from conmon): %w", ss.err)
- }
- logrus.Debugf("Received: %d", ss.si.Data)
- if ss.si.Data < 0 {
- if ociLog != "" {
- ociLogData, err := ioutil.ReadFile(ociLog)
- if err == nil {
- var ociErr ociError
- if err := json.Unmarshal(ociLogData, &ociErr); err == nil {
- return ss.si.Data, getOCIRuntimeError(runtimeName, ociErr.Msg)
- }
- }
- }
- // If we failed to parse the JSON errors, then print the output as it is
- if ss.si.Message != "" {
- return ss.si.Data, getOCIRuntimeError(runtimeName, ss.si.Message)
- }
- return ss.si.Data, fmt.Errorf("container create failed: %w", define.ErrInternal)
- }
- data = ss.si.Data
- case <-time.After(define.ContainerCreateTimeout):
- return -1, fmt.Errorf("container creation timeout: %w", define.ErrInternal)
- }
- return data, nil
-}
-
-// writeConmonPipeData writes nonce data to a pipe
-func writeConmonPipeData(pipe *os.File) error {
- someData := []byte{0}
- _, err := pipe.Write(someData)
- return err
-}
-
-// formatRuntimeOpts prepends opts passed to it with --runtime-opt for passing to conmon
-func formatRuntimeOpts(opts ...string) []string {
- args := make([]string, 0, len(opts)*2)
- for _, o := range opts {
- args = append(args, "--runtime-opt", o)
- }
- return args
-}
-
-// getConmonVersion returns a string representation of the conmon version.
-func (r *ConmonOCIRuntime) getConmonVersion() (string, error) {
- output, err := utils.ExecCmd(r.conmonPath, "--version")
- if err != nil {
- return "", err
- }
- return strings.TrimSuffix(strings.Replace(output, "\n", ", ", 1), "\n"), nil
-}
-
-// getOCIRuntimeVersion returns a string representation of the OCI runtime's
-// version.
-func (r *ConmonOCIRuntime) getOCIRuntimeVersion() (string, error) {
- output, err := utils.ExecCmd(r.path, "--version")
- if err != nil {
- return "", err
- }
- return strings.TrimSuffix(output, "\n"), nil
-}
-
-// Copy data from container to HTTP connection, for terminal attach.
-// Container is the container's attach socket connection, http is a buffer for
-// the HTTP connection. cid is the ID of the container the attach session is
-// running for (used solely for error messages).
-func httpAttachTerminalCopy(container *net.UnixConn, http *bufio.ReadWriter, cid string) error {
- buf := make([]byte, bufferSize)
- for {
- numR, err := container.Read(buf)
- logrus.Debugf("Read fd(%d) %d/%d bytes for container %s", int(buf[0]), numR, len(buf), cid)
-
- if numR > 0 {
- switch buf[0] {
- case AttachPipeStdout:
- // Do nothing
- default:
- logrus.Errorf("Received unexpected attach type %+d, discarding %d bytes", buf[0], numR)
- continue
- }
-
- numW, err2 := http.Write(buf[1:numR])
- if err2 != nil {
- if err != nil {
- logrus.Errorf("Reading container %s STDOUT: %v", cid, err)
- }
- return err2
- } else if numW+1 != numR {
- return io.ErrShortWrite
- }
- // We need to force the buffer to write immediately, so
- // there isn't a delay on the terminal side.
- if err2 := http.Flush(); err2 != nil {
- if err != nil {
- logrus.Errorf("Reading container %s STDOUT: %v", cid, err)
- }
- return err2
- }
- }
- if err != nil {
- if err == io.EOF {
- return nil
- }
- return err
- }
- }
-}
-
-// Copy data from a container to an HTTP connection, for non-terminal attach.
-// Appends a header to multiplex input.
-func httpAttachNonTerminalCopy(container *net.UnixConn, http *bufio.ReadWriter, cid string, stdin, stdout, stderr bool) error {
- buf := make([]byte, bufferSize)
- for {
- numR, err := container.Read(buf)
- if numR > 0 {
- var headerBuf []byte
-
- // Subtract 1 because we strip the first byte (used for
- // multiplexing by Conmon).
- headerLen := uint32(numR - 1)
- // Practically speaking, we could make this buf[0] - 1,
- // but we need to validate it anyway.
- switch buf[0] {
- case AttachPipeStdin:
- headerBuf = makeHTTPAttachHeader(0, headerLen)
- if !stdin {
- continue
- }
- case AttachPipeStdout:
- if !stdout {
- continue
- }
- headerBuf = makeHTTPAttachHeader(1, headerLen)
- case AttachPipeStderr:
- if !stderr {
- continue
- }
- headerBuf = makeHTTPAttachHeader(2, headerLen)
- default:
- logrus.Errorf("Received unexpected attach type %+d, discarding %d bytes", buf[0], numR)
- continue
- }
-
- numH, err2 := http.Write(headerBuf)
- if err2 != nil {
- if err != nil {
- logrus.Errorf("Reading container %s standard streams: %v", cid, err)
- }
-
- return err2
- }
- // Hardcoding header length is pretty gross, but
- // fast. Should be safe, as this is a fixed part
- // of the protocol.
- if numH != 8 {
- if err != nil {
- logrus.Errorf("Reading container %s standard streams: %v", cid, err)
- }
-
- return io.ErrShortWrite
- }
-
- numW, err2 := http.Write(buf[1:numR])
- if err2 != nil {
- if err != nil {
- logrus.Errorf("Reading container %s standard streams: %v", cid, err)
- }
-
- return err2
- } else if numW+1 != numR {
- if err != nil {
- logrus.Errorf("Reading container %s standard streams: %v", cid, err)
- }
-
- return io.ErrShortWrite
- }
- // We need to force the buffer to write immediately, so
- // there isn't a delay on the terminal side.
- if err2 := http.Flush(); err2 != nil {
- if err != nil {
- logrus.Errorf("Reading container %s STDOUT: %v", cid, err)
- }
- return err2
- }
- }
- if err != nil {
- if err == io.EOF {
- return nil
- }
-
- return err
- }
- }
-}
-
// GetLimits converts spec resource limits to cgroup consumable limits
func GetLimits(resource *spec.LinuxResources) (runcconfig.Resources, error) {
if resource == nil {
diff --git a/libpod/oci_conmon_unsupported.go b/libpod/oci_conmon_unsupported.go
new file mode 100644
index 000000000..cc6d68e89
--- /dev/null
+++ b/libpod/oci_conmon_unsupported.go
@@ -0,0 +1,24 @@
+//go:build !linux && !freebsd
+// +build !linux,!freebsd
+
+package libpod
+
+import (
+ "errors"
+
+ "github.com/containers/common/pkg/config"
+ "github.com/containers/common/pkg/resize"
+)
+
+// Make a new Conmon-based OCI runtime with the given options.
+// Conmon will wrap the given OCI runtime, which can be `runc`, `crun`, or
+// any runtime with a runc-compatible CLI.
+// The first path that points to a valid executable will be used.
+// Deliberately private. Someone should not be able to construct this outside of
+// libpod.
+func newConmonOCIRuntime(name string, paths []string, conmonPath string, runtimeFlags []string, runtimeCfg *config.Config) (OCIRuntime, error) {
+ return nil, errors.New("newConmonOCIRuntime not supported on this platform")
+}
+
+func registerResizeFunc(r <-chan resize.TerminalSize, bundlePath string) {
+}
diff --git a/libpod/oci_missing.go b/libpod/oci_missing.go
index 2ab2b4577..bbf2957ff 100644
--- a/libpod/oci_missing.go
+++ b/libpod/oci_missing.go
@@ -8,6 +8,7 @@ import (
"github.com/containers/common/pkg/resize"
"github.com/containers/podman/v4/libpod/define"
+ spec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
)
@@ -80,6 +81,11 @@ func (r *MissingRuntime) StartContainer(ctr *Container) error {
return r.printError()
}
+// UpdateContainer is not available as the runtime is missing
+func (r *MissingRuntime) UpdateContainer(ctr *Container, resources *spec.LinuxResources) error {
+ return r.printError()
+}
+
// KillContainer is not available as the runtime is missing
// TODO: We could attempt to unix.Kill() the PID as recorded in the state if we
// really want to smooth things out? Won't be perfect, but if the container has
diff --git a/libpod/options.go b/libpod/options.go
index b31cb4ab2..71ad3d11e 100644
--- a/libpod/options.go
+++ b/libpod/options.go
@@ -6,14 +6,12 @@ import (
"net"
"os"
"path/filepath"
- "strings"
"syscall"
"github.com/containers/buildah/pkg/parse"
nettypes "github.com/containers/common/libnetwork/types"
"github.com/containers/common/pkg/config"
"github.com/containers/common/pkg/secrets"
- cutil "github.com/containers/common/pkg/util"
"github.com/containers/image/v5/manifest"
"github.com/containers/image/v5/types"
"github.com/containers/podman/v4/libpod/define"
@@ -29,12 +27,6 @@ import (
"github.com/sirupsen/logrus"
)
-// Runtime Creation Options
-var (
- // SdNotifyModeValues describes the only values that SdNotifyMode can be
- SdNotifyModeValues = []string{define.SdNotifyModeContainer, define.SdNotifyModeConmon, define.SdNotifyModeIgnore}
-)
-
// WithStorageConfig uses the given configuration to set up container storage.
// If this is not specified, the system default configuration will be used
// instead.
@@ -613,6 +605,17 @@ func WithSystemd() CtrCreateOption {
}
}
+// WithSdNotifySocket sets the sd-notify of the container
+func WithSdNotifySocket(socketPath string) CtrCreateOption {
+ return func(ctr *Container) error {
+ if ctr.valid {
+ return define.ErrCtrFinalized
+ }
+ ctr.config.SdNotifySocket = socketPath
+ return nil
+ }
+}
+
// WithSdNotifyMode sets the sd-notify method
func WithSdNotifyMode(mode string) CtrCreateOption {
return func(ctr *Container) error {
@@ -620,9 +623,8 @@ func WithSdNotifyMode(mode string) CtrCreateOption {
return define.ErrCtrFinalized
}
- // verify values
- if len(mode) > 0 && !cutil.StringInSlice(strings.ToLower(mode), SdNotifyModeValues) {
- return fmt.Errorf("--sdnotify values must be one of %q: %w", strings.Join(SdNotifyModeValues, ", "), define.ErrInvalidArg)
+ if err := define.ValidateSdNotifyMode(mode); err != nil {
+ return err
}
ctr.config.SdNotifyMode = mode
@@ -1411,9 +1413,10 @@ func WithNamedVolumes(volumes []*ContainerNamedVolume) CtrCreateOption {
}
ctr.config.NamedVolumes = append(ctr.config.NamedVolumes, &ContainerNamedVolume{
- Name: vol.Name,
- Dest: vol.Dest,
- Options: mountOpts,
+ Name: vol.Name,
+ Dest: vol.Dest,
+ Options: mountOpts,
+ IsAnonymous: vol.IsAnonymous,
})
}
@@ -1470,6 +1473,17 @@ func WithHealthCheck(healthCheck *manifest.Schema2HealthConfig) CtrCreateOption
}
}
+// WithHealthCheckOnFailureAction adds an on-failure action to health-check config
+func WithHealthCheckOnFailureAction(action define.HealthCheckOnFailureAction) CtrCreateOption {
+ return func(ctr *Container) error {
+ if ctr.valid {
+ return define.ErrCtrFinalized
+ }
+ ctr.config.HealthCheckOnFailureAction = action
+ return nil
+ }
+}
+
// WithPreserveFDs forwards from the process running Libpod into the container
// the given number of extra FDs (starting after the standard streams) to the created container
func WithPreserveFDs(fd uint) CtrCreateOption {
@@ -1693,14 +1707,22 @@ func withSetAnon() VolumeCreateOption {
}
}
-// WithVolumeDriverTimeout sets the volume creation timeout period
-func WithVolumeDriverTimeout(timeout int) VolumeCreateOption {
+// WithVolumeDriverTimeout sets the volume creation timeout period.
+// Only usable if a non-local volume driver is in use.
+func WithVolumeDriverTimeout(timeout uint) VolumeCreateOption {
return func(volume *Volume) error {
if volume.valid {
return define.ErrVolumeFinalized
}
- volume.config.Timeout = timeout
+ if volume.config.Driver == "" || volume.config.Driver == define.VolumeDriverLocal {
+ return fmt.Errorf("Volume driver timeout can only be used with non-local volume drivers: %w", define.ErrInvalidArg)
+ }
+
+ tm := timeout
+
+ volume.config.Timeout = &tm
+
return nil
}
}
diff --git a/libpod/plugin/volume_api.go b/libpod/plugin/volume_api.go
index 0a5eaae53..522895798 100644
--- a/libpod/plugin/volume_api.go
+++ b/libpod/plugin/volume_api.go
@@ -3,6 +3,7 @@ package plugin
import (
"bytes"
"context"
+ "errors"
"fmt"
"io/ioutil"
"net"
@@ -13,8 +14,7 @@ import (
"sync"
"time"
- "errors"
-
+ "github.com/containers/common/pkg/config"
"github.com/containers/podman/v4/libpod/define"
"github.com/docker/go-plugins-helpers/sdk"
"github.com/docker/go-plugins-helpers/volume"
@@ -40,7 +40,6 @@ var (
)
const (
- defaultTimeout = 5 * time.Second
volumePluginType = "VolumeDriver"
)
@@ -77,7 +76,7 @@ func validatePlugin(newPlugin *VolumePlugin) error {
// Hit the Activate endpoint to find out if it is, and if so what kind
req, err := http.NewRequest("POST", "http://plugin"+activatePath, nil)
if err != nil {
- return fmt.Errorf("error making request to volume plugin %s activation endpoint: %w", newPlugin.Name, err)
+ return fmt.Errorf("making request to volume plugin %s activation endpoint: %w", newPlugin.Name, err)
}
req.Header.Set("Host", newPlugin.getURI())
@@ -85,7 +84,7 @@ func validatePlugin(newPlugin *VolumePlugin) error {
resp, err := newPlugin.Client.Do(req)
if err != nil {
- return fmt.Errorf("error sending request to plugin %s activation endpoint: %w", newPlugin.Name, err)
+ return fmt.Errorf("sending request to plugin %s activation endpoint: %w", newPlugin.Name, err)
}
defer resp.Body.Close()
@@ -98,12 +97,12 @@ func validatePlugin(newPlugin *VolumePlugin) error {
// Read and decode the body so we can tell if this is a volume plugin.
respBytes, err := ioutil.ReadAll(resp.Body)
if err != nil {
- return fmt.Errorf("error reading activation response body from plugin %s: %w", newPlugin.Name, err)
+ return fmt.Errorf("reading activation response body from plugin %s: %w", newPlugin.Name, err)
}
respStruct := new(activateResponse)
if err := json.Unmarshal(respBytes, respStruct); err != nil {
- return fmt.Errorf("error unmarshalling plugin %s activation response: %w", newPlugin.Name, err)
+ return fmt.Errorf("unmarshalling plugin %s activation response: %w", newPlugin.Name, err)
}
foundVolume := false
@@ -129,7 +128,7 @@ func validatePlugin(newPlugin *VolumePlugin) error {
// GetVolumePlugin gets a single volume plugin, with the given name, at the
// given path.
-func GetVolumePlugin(name string, path string, timeout int) (*VolumePlugin, error) {
+func GetVolumePlugin(name string, path string, timeout *uint, cfg *config.Config) (*VolumePlugin, error) {
pluginsLock.Lock()
defer pluginsLock.Unlock()
@@ -152,13 +151,11 @@ func GetVolumePlugin(name string, path string, timeout int) (*VolumePlugin, erro
// Need an HTTP client to force a Unix connection.
// And since we can reuse it, might as well cache it.
client := new(http.Client)
- client.Timeout = defaultTimeout
- // if the user specified a non-zero timeout, use their value. Else, keep the default.
- if timeout != 0 {
- if time.Duration(timeout)*time.Second < defaultTimeout {
- logrus.Warnf("the default timeout for volume creation is %d seconds, setting a time less than that may break this feature.", defaultTimeout)
- }
- client.Timeout = time.Duration(timeout) * time.Second
+ client.Timeout = 5 * time.Second
+ if timeout != nil {
+ client.Timeout = time.Duration(*timeout) * time.Second
+ } else if cfg != nil {
+ client.Timeout = time.Duration(cfg.Engine.VolumePluginTimeout) * time.Second
}
// This bit borrowed from pkg/bindings/connection.go
client.Transport = &http.Transport{
@@ -199,7 +196,7 @@ func (p *VolumePlugin) verifyReachable() error {
return fmt.Errorf("%s: %w", p.Name, ErrPluginRemoved)
}
- return fmt.Errorf("error accessing plugin %s: %w", p.Name, err)
+ return fmt.Errorf("accessing plugin %s: %w", p.Name, err)
}
return nil
}
@@ -215,13 +212,13 @@ func (p *VolumePlugin) sendRequest(toJSON interface{}, endpoint string) (*http.R
if toJSON != nil {
reqJSON, err = json.Marshal(toJSON)
if err != nil {
- return nil, fmt.Errorf("error marshalling request JSON for volume plugin %s endpoint %s: %w", p.Name, endpoint, err)
+ return nil, fmt.Errorf("marshalling request JSON for volume plugin %s endpoint %s: %w", p.Name, endpoint, err)
}
}
req, err := http.NewRequest("POST", "http://plugin"+endpoint, bytes.NewReader(reqJSON))
if err != nil {
- return nil, fmt.Errorf("error making request to volume plugin %s endpoint %s: %w", p.Name, endpoint, err)
+ return nil, fmt.Errorf("making request to volume plugin %s endpoint %s: %w", p.Name, endpoint, err)
}
req.Header.Set("Host", p.getURI())
@@ -229,7 +226,7 @@ func (p *VolumePlugin) sendRequest(toJSON interface{}, endpoint string) (*http.R
resp, err := p.Client.Do(req)
if err != nil {
- return nil, fmt.Errorf("error sending request to volume plugin %s endpoint %s: %w", p.Name, endpoint, err)
+ return nil, fmt.Errorf("sending request to volume plugin %s endpoint %s: %w", p.Name, endpoint, err)
}
// We are *deliberately not closing* response here. It is the
// responsibility of the caller to do so after reading the response.
@@ -243,9 +240,9 @@ func (p *VolumePlugin) makeErrorResponse(err, endpoint, volName string) error {
err = "empty error from plugin"
}
if volName != "" {
- return fmt.Errorf("error on %s on volume %s in volume plugin %s: %w", endpoint, volName, p.Name, errors.New(err))
+ return fmt.Errorf("on %s on volume %s in volume plugin %s: %w", endpoint, volName, p.Name, errors.New(err))
}
- return fmt.Errorf("error on %s in volume plugin %s: %w", endpoint, p.Name, errors.New(err))
+ return fmt.Errorf("on %s in volume plugin %s: %w", endpoint, p.Name, errors.New(err))
}
// Handle error responses from plugin
@@ -257,12 +254,12 @@ func (p *VolumePlugin) handleErrorResponse(resp *http.Response, endpoint, volNam
if resp.StatusCode != 200 {
errResp, err := ioutil.ReadAll(resp.Body)
if err != nil {
- return fmt.Errorf("error reading response body from volume plugin %s: %w", p.Name, err)
+ return fmt.Errorf("reading response body from volume plugin %s: %w", p.Name, err)
}
errStruct := new(volume.ErrorResponse)
if err := json.Unmarshal(errResp, errStruct); err != nil {
- return fmt.Errorf("error unmarshalling JSON response from volume plugin %s: %w", p.Name, err)
+ return fmt.Errorf("unmarshalling JSON response from volume plugin %s: %w", p.Name, err)
}
return p.makeErrorResponse(errStruct.Err, endpoint, volName)
@@ -312,12 +309,12 @@ func (p *VolumePlugin) ListVolumes() ([]*volume.Volume, error) {
volumeRespBytes, err := ioutil.ReadAll(resp.Body)
if err != nil {
- return nil, fmt.Errorf("error reading response body from volume plugin %s: %w", p.Name, err)
+ return nil, fmt.Errorf("reading response body from volume plugin %s: %w", p.Name, err)
}
volumeResp := new(volume.ListResponse)
if err := json.Unmarshal(volumeRespBytes, volumeResp); err != nil {
- return nil, fmt.Errorf("error unmarshalling volume plugin %s list response: %w", p.Name, err)
+ return nil, fmt.Errorf("unmarshalling volume plugin %s list response: %w", p.Name, err)
}
return volumeResp.Volumes, nil
@@ -347,12 +344,12 @@ func (p *VolumePlugin) GetVolume(req *volume.GetRequest) (*volume.Volume, error)
getRespBytes, err := ioutil.ReadAll(resp.Body)
if err != nil {
- return nil, fmt.Errorf("error reading response body from volume plugin %s: %w", p.Name, err)
+ return nil, fmt.Errorf("reading response body from volume plugin %s: %w", p.Name, err)
}
getResp := new(volume.GetResponse)
if err := json.Unmarshal(getRespBytes, getResp); err != nil {
- return nil, fmt.Errorf("error unmarshalling volume plugin %s get response: %w", p.Name, err)
+ return nil, fmt.Errorf("unmarshalling volume plugin %s get response: %w", p.Name, err)
}
return getResp.Volume, nil
@@ -403,12 +400,12 @@ func (p *VolumePlugin) GetVolumePath(req *volume.PathRequest) (string, error) {
pathRespBytes, err := ioutil.ReadAll(resp.Body)
if err != nil {
- return "", fmt.Errorf("error reading response body from volume plugin %s: %w", p.Name, err)
+ return "", fmt.Errorf("reading response body from volume plugin %s: %w", p.Name, err)
}
pathResp := new(volume.PathResponse)
if err := json.Unmarshal(pathRespBytes, pathResp); err != nil {
- return "", fmt.Errorf("error unmarshalling volume plugin %s path response: %w", p.Name, err)
+ return "", fmt.Errorf("unmarshalling volume plugin %s path response: %w", p.Name, err)
}
return pathResp.Mountpoint, nil
@@ -440,12 +437,12 @@ func (p *VolumePlugin) MountVolume(req *volume.MountRequest) (string, error) {
mountRespBytes, err := ioutil.ReadAll(resp.Body)
if err != nil {
- return "", fmt.Errorf("error reading response body from volume plugin %s: %w", p.Name, err)
+ return "", fmt.Errorf("reading response body from volume plugin %s: %w", p.Name, err)
}
mountResp := new(volume.MountResponse)
if err := json.Unmarshal(mountRespBytes, mountResp); err != nil {
- return "", fmt.Errorf("error unmarshalling volume plugin %s path response: %w", p.Name, err)
+ return "", fmt.Errorf("unmarshalling volume plugin %s path response: %w", p.Name, err)
}
return mountResp.Mountpoint, nil
diff --git a/libpod/pod_api.go b/libpod/pod_api.go
index 29964ae95..924d43436 100644
--- a/libpod/pod_api.go
+++ b/libpod/pod_api.go
@@ -40,7 +40,7 @@ func (p *Pod) startInitContainers(ctx context.Context) error {
icLock := initCon.lock
icLock.Lock()
var time *uint
- if err := p.runtime.removeContainer(ctx, initCon, false, false, true, time); err != nil {
+ if err := p.runtime.removeContainer(ctx, initCon, false, false, true, false, time); err != nil {
icLock.Unlock()
return fmt.Errorf("failed to remove once init container %s: %w", initCon.ID(), err)
}
@@ -92,7 +92,7 @@ func (p *Pod) Start(ctx context.Context) (map[string]error, error) {
// Build a dependency graph of containers in the pod
graph, err := BuildContainerGraph(allCtrs)
if err != nil {
- return nil, fmt.Errorf("error generating dependency graph for pod %s: %w", p.ID(), err)
+ return nil, fmt.Errorf("generating dependency graph for pod %s: %w", p.ID(), err)
}
// If there are no containers without dependencies, we can't start
// Error out
@@ -109,7 +109,7 @@ func (p *Pod) Start(ctx context.Context) (map[string]error, error) {
}
if len(ctrErrors) > 0 {
- return ctrErrors, fmt.Errorf("error starting some containers: %w", define.ErrPodPartialFail)
+ return ctrErrors, fmt.Errorf("starting some containers: %w", define.ErrPodPartialFail)
}
defer p.newPodEvent(events.Start)
return nil, nil
@@ -201,7 +201,7 @@ func (p *Pod) stopWithTimeout(ctx context.Context, cleanup bool, timeout int) (m
}
if len(ctrErrors) > 0 {
- return ctrErrors, fmt.Errorf("error stopping some containers: %w", define.ErrPodPartialFail)
+ return ctrErrors, fmt.Errorf("stopping some containers: %w", define.ErrPodPartialFail)
}
if err := p.maybeStopServiceContainer(); err != nil {
@@ -305,7 +305,7 @@ func (p *Pod) Cleanup(ctx context.Context) (map[string]error, error) {
}
if len(ctrErrors) > 0 {
- return ctrErrors, fmt.Errorf("error cleaning up some containers: %w", define.ErrPodPartialFail)
+ return ctrErrors, fmt.Errorf("cleaning up some containers: %w", define.ErrPodPartialFail)
}
if err := p.maybeStopServiceContainer(); err != nil {
@@ -376,7 +376,7 @@ func (p *Pod) Pause(ctx context.Context) (map[string]error, error) {
}
if len(ctrErrors) > 0 {
- return ctrErrors, fmt.Errorf("error pausing some containers: %w", define.ErrPodPartialFail)
+ return ctrErrors, fmt.Errorf("pausing some containers: %w", define.ErrPodPartialFail)
}
return nil, nil
}
@@ -432,7 +432,7 @@ func (p *Pod) Unpause(ctx context.Context) (map[string]error, error) {
}
if len(ctrErrors) > 0 {
- return ctrErrors, fmt.Errorf("error unpausing some containers: %w", define.ErrPodPartialFail)
+ return ctrErrors, fmt.Errorf("unpausing some containers: %w", define.ErrPodPartialFail)
}
return nil, nil
}
@@ -470,7 +470,7 @@ func (p *Pod) Restart(ctx context.Context) (map[string]error, error) {
// Build a dependency graph of containers in the pod
graph, err := BuildContainerGraph(allCtrs)
if err != nil {
- return nil, fmt.Errorf("error generating dependency graph for pod %s: %w", p.ID(), err)
+ return nil, fmt.Errorf("generating dependency graph for pod %s: %w", p.ID(), err)
}
ctrErrors := make(map[string]error)
@@ -488,7 +488,7 @@ func (p *Pod) Restart(ctx context.Context) (map[string]error, error) {
}
if len(ctrErrors) > 0 {
- return ctrErrors, fmt.Errorf("error stopping some containers: %w", define.ErrPodPartialFail)
+ return ctrErrors, fmt.Errorf("stopping some containers: %w", define.ErrPodPartialFail)
}
p.newPodEvent(events.Stop)
p.newPodEvent(events.Start)
@@ -547,7 +547,7 @@ func (p *Pod) Kill(ctx context.Context, signal uint) (map[string]error, error) {
}
if len(ctrErrors) > 0 {
- return ctrErrors, fmt.Errorf("error killing some containers: %w", define.ErrPodPartialFail)
+ return ctrErrors, fmt.Errorf("killing some containers: %w", define.ErrPodPartialFail)
}
if err := p.maybeStopServiceContainer(); err != nil {
diff --git a/libpod/pod_internal.go b/libpod/pod_internal.go
index a86cd6d21..d63bff2c9 100644
--- a/libpod/pod_internal.go
+++ b/libpod/pod_internal.go
@@ -16,7 +16,7 @@ import (
func newPod(runtime *Runtime) *Pod {
pod := new(Pod)
pod.config = new(PodConfig)
- pod.config.ID = stringid.GenerateNonCryptoID()
+ pod.config.ID = stringid.GenerateRandomID()
pod.config.Labels = make(map[string]string)
pod.config.CreatedTime = time.Now()
// pod.config.InfraContainer = new(ContainerConfig)
@@ -38,7 +38,7 @@ func (p *Pod) updatePod() error {
// Save pod state to database
func (p *Pod) save() error {
if err := p.runtime.state.SavePod(p); err != nil {
- return fmt.Errorf("error saving pod %s state: %w", p.ID(), err)
+ return fmt.Errorf("saving pod %s state: %w", p.ID(), err)
}
return nil
@@ -60,7 +60,7 @@ func (p *Pod) refresh() error {
// Retrieve the pod's lock
lock, err := p.runtime.lockManager.AllocateAndRetrieveLock(p.config.LockID)
if err != nil {
- return fmt.Errorf("error retrieving lock %d for pod %s: %w", p.config.LockID, p.ID(), err)
+ return fmt.Errorf("retrieving lock %d for pod %s: %w", p.config.LockID, p.ID(), err)
}
p.lock = lock
diff --git a/libpod/pod_top_unsupported.go b/libpod/pod_top_unsupported.go
new file mode 100644
index 000000000..92323043a
--- /dev/null
+++ b/libpod/pod_top_unsupported.go
@@ -0,0 +1,20 @@
+//go:build !linux
+// +build !linux
+
+package libpod
+
+import (
+ "errors"
+)
+
+// GetPodPidInformation returns process-related data of all processes in
+// the pod. The output data can be controlled via the `descriptors`
+// argument which expects format descriptors and supports all AIXformat
+// descriptors of ps (1) plus some additional ones to for instance inspect the
+// set of effective capabilities. Each element in the returned string slice
+// is a tab-separated string.
+//
+// For more details, please refer to github.com/containers/psgo.
+func (p *Pod) GetPodPidInformation(descriptors []string) ([]string, error) {
+ return nil, errors.New("not implemented (*Pod) GetPodPidInformation")
+}
diff --git a/libpod/runtime.go b/libpod/runtime.go
index ea4b34954..83c9f53e2 100644
--- a/libpod/runtime.go
+++ b/libpod/runtime.go
@@ -2,15 +2,12 @@ package libpod
import (
"bufio"
- "bytes"
"context"
"errors"
"fmt"
+ "math/rand"
"os"
- "os/exec"
"path/filepath"
- "regexp"
- "strconv"
"strings"
"sync"
"syscall"
@@ -44,17 +41,6 @@ import (
"github.com/sirupsen/logrus"
)
-const (
- // conmonMinMajorVersion is the major version required for conmon.
- conmonMinMajorVersion = 2
-
- // conmonMinMinorVersion is the minor version required for conmon.
- conmonMinMinorVersion = 0
-
- // conmonMinPatchVersion is the sub-minor version required for conmon.
- conmonMinPatchVersion = 24
-)
-
// A RuntimeOption is a functional option which alters the Runtime created by
// NewRuntime
type RuntimeOption func(*Runtime) error
@@ -127,6 +113,13 @@ type Runtime struct {
secretsManager *secrets.SecretsManager
}
+func init() {
+ // generateName calls namesgenerator.GetRandomName which the
+ // global RNG from math/rand. Seed it here to make sure we
+ // don't get the same name every time.
+ rand.Seed(time.Now().UnixNano())
+}
+
// SetXdgDirs ensures the XDG_RUNTIME_DIR env and XDG_CONFIG_HOME variables are set.
// containers/image uses XDG_RUNTIME_DIR to locate the auth file, XDG_CONFIG_HOME is
// use for the containers.conf configuration file.
@@ -214,7 +207,7 @@ func newRuntimeFromConfig(conf *config.Config, options ...RuntimeOption) (*Runti
// Overwrite config with user-given configuration options
for _, opt := range options {
if err := opt(runtime); err != nil {
- return nil, fmt.Errorf("error configuring runtime: %w", err)
+ return nil, fmt.Errorf("configuring runtime: %w", err)
}
}
@@ -230,7 +223,7 @@ func newRuntimeFromConfig(conf *config.Config, options ...RuntimeOption) (*Runti
}
if err := shutdown.Start(); err != nil {
- return nil, fmt.Errorf("error starting shutdown signal handler: %w", err)
+ return nil, fmt.Errorf("starting shutdown signal handler: %w", err)
}
if err := makeRuntime(runtime); err != nil {
@@ -287,7 +280,7 @@ func getLockManager(runtime *Runtime) (lock.Manager, error) {
// Since we're renumbering, this is not fatal.
// Remove the earlier set of locks and recreate.
if err := os.Remove(filepath.Join("/dev/shm", lockPath)); err != nil {
- return nil, fmt.Errorf("error removing libpod locks file %s: %w", lockPath, err)
+ return nil, fmt.Errorf("removing libpod locks file %s: %w", lockPath, err)
}
manager, err = lock.NewSHMLockManager(lockPath, runtime.config.Engine.NumLocks)
@@ -308,7 +301,7 @@ func getLockManager(runtime *Runtime) (lock.Manager, error) {
// Sets up containers/storage, state store, OCI runtime
func makeRuntime(runtime *Runtime) (retErr error) {
// Find a working conmon binary
- cPath, err := findConmon(runtime.config.Engine.ConmonPath)
+ cPath, err := runtime.config.FindConmon()
if err != nil {
return err
}
@@ -325,7 +318,7 @@ func makeRuntime(runtime *Runtime) (retErr error) {
if err := os.MkdirAll(runtime.config.Engine.StaticDir, 0700); err != nil {
// The directory is allowed to exist
if !errors.Is(err, os.ErrExist) {
- return fmt.Errorf("error creating runtime static files directory: %w", err)
+ return fmt.Errorf("creating runtime static files directory: %w", err)
}
}
@@ -369,7 +362,7 @@ func makeRuntime(runtime *Runtime) (retErr error) {
}
}
- return fmt.Errorf("error retrieving runtime configuration from database: %w", err)
+ return fmt.Errorf("retrieving runtime configuration from database: %w", err)
}
runtime.mergeDBConfig(dbConfig)
@@ -412,7 +405,7 @@ func makeRuntime(runtime *Runtime) (retErr error) {
}
if err := runtime.state.SetNamespace(runtime.config.Engine.Namespace); err != nil {
- return fmt.Errorf("error setting libpod namespace in state: %w", err)
+ return fmt.Errorf("setting libpod namespace in state: %w", err)
}
logrus.Debugf("Set libpod namespace to %q", runtime.config.Engine.Namespace)
@@ -469,15 +462,7 @@ func makeRuntime(runtime *Runtime) (retErr error) {
if err := os.MkdirAll(runtime.config.Engine.TmpDir, 0751); err != nil {
// The directory is allowed to exist
if !errors.Is(err, os.ErrExist) {
- return fmt.Errorf("error creating tmpdir: %w", err)
- }
- }
-
- // Create events log dir
- if err := os.MkdirAll(filepath.Dir(runtime.config.Engine.EventsLogFilePath), 0700); err != nil {
- // The directory is allowed to exist
- if !errors.Is(err, os.ErrExist) {
- return fmt.Errorf("error creating events dirs: %w", err)
+ return fmt.Errorf("creating tmpdir: %w", err)
}
}
@@ -535,7 +520,7 @@ func makeRuntime(runtime *Runtime) (retErr error) {
if err := os.MkdirAll(runtime.config.Engine.TmpDir, 0755); err != nil {
// The directory is allowed to exist
if !errors.Is(err, os.ErrExist) {
- return fmt.Errorf("error creating runtime temporary files directory: %w", err)
+ return fmt.Errorf("creating runtime temporary files directory: %w", err)
}
}
@@ -556,7 +541,7 @@ func makeRuntime(runtime *Runtime) (retErr error) {
runtimeAliveFile := filepath.Join(runtime.config.Engine.TmpDir, "alive")
aliveLock, err := storage.GetLockfile(runtimeAliveLock)
if err != nil {
- return fmt.Errorf("error acquiring runtime init lock: %w", err)
+ return fmt.Errorf("acquiring runtime init lock: %w", err)
}
// Acquire the lock and hold it until we return
// This ensures that no two processes will be in runtime.refresh at once
@@ -610,7 +595,7 @@ func makeRuntime(runtime *Runtime) (retErr error) {
if errors.Is(err, os.ErrNotExist) {
doRefresh = true
} else {
- return fmt.Errorf("error reading runtime status file %s: %w", runtimeAliveFile, err)
+ return fmt.Errorf("reading runtime status file %s: %w", runtimeAliveFile, err)
}
}
@@ -670,102 +655,6 @@ func makeRuntime(runtime *Runtime) (retErr error) {
return nil
}
-// findConmon iterates over conmonPaths and returns the path
-// to the first conmon binary with a new enough version. If none is found,
-// we try to do a path lookup of "conmon".
-func findConmon(conmonPaths []string) (string, error) {
- foundOutdatedConmon := false
- for _, path := range conmonPaths {
- stat, err := os.Stat(path)
- if err != nil {
- continue
- }
- if stat.IsDir() {
- continue
- }
- if err := probeConmon(path); err != nil {
- logrus.Warnf("Conmon at %s invalid: %v", path, err)
- foundOutdatedConmon = true
- continue
- }
- logrus.Debugf("Using conmon: %q", path)
- return path, nil
- }
-
- // Search the $PATH as last fallback
- if path, err := exec.LookPath("conmon"); err == nil {
- if err := probeConmon(path); err != nil {
- logrus.Warnf("Conmon at %s is invalid: %v", path, err)
- foundOutdatedConmon = true
- } else {
- logrus.Debugf("Using conmon from $PATH: %q", path)
- return path, nil
- }
- }
-
- if foundOutdatedConmon {
- return "", fmt.Errorf(
- "please update to v%d.%d.%d or later: %w",
- conmonMinMajorVersion, conmonMinMinorVersion, conmonMinPatchVersion, define.ErrConmonOutdated)
- }
-
- return "", fmt.Errorf(
- "could not find a working conmon binary (configured options: %v): %w",
- conmonPaths, define.ErrInvalidArg)
-}
-
-// probeConmon calls conmon --version and verifies it is a new enough version for
-// the runtime expectations the container engine currently has.
-func probeConmon(conmonBinary string) error {
- cmd := exec.Command(conmonBinary, "--version")
- var out bytes.Buffer
- cmd.Stdout = &out
- err := cmd.Run()
- if err != nil {
- return err
- }
- r := regexp.MustCompile(`^conmon version (?P<Major>\d+).(?P<Minor>\d+).(?P<Patch>\d+)`)
-
- matches := r.FindStringSubmatch(out.String())
- if len(matches) != 4 {
- return fmt.Errorf("%v: %w", define.ErrConmonVersionFormat, err)
- }
- major, err := strconv.Atoi(matches[1])
- if err != nil {
- return fmt.Errorf("%v: %w", define.ErrConmonVersionFormat, err)
- }
- if major < conmonMinMajorVersion {
- return define.ErrConmonOutdated
- }
- if major > conmonMinMajorVersion {
- return nil
- }
-
- minor, err := strconv.Atoi(matches[2])
- if err != nil {
- return fmt.Errorf("%v: %w", define.ErrConmonVersionFormat, err)
- }
- if minor < conmonMinMinorVersion {
- return define.ErrConmonOutdated
- }
- if minor > conmonMinMinorVersion {
- return nil
- }
-
- patch, err := strconv.Atoi(matches[3])
- if err != nil {
- return fmt.Errorf("%v: %w", define.ErrConmonVersionFormat, err)
- }
- if patch < conmonMinPatchVersion {
- return define.ErrConmonOutdated
- }
- if patch > conmonMinPatchVersion {
- return nil
- }
-
- return nil
-}
-
// TmpDir gets the current Libpod temporary files directory.
func (r *Runtime) TmpDir() (string, error) {
if !r.valid {
@@ -798,7 +687,7 @@ func (r *Runtime) GetConfig() (*config.Config, error) {
// Copy so the caller won't be able to modify the actual config
if err := JSONDeepCopy(rtConfig, config); err != nil {
- return nil, fmt.Errorf("error copying config: %w", err)
+ return nil, fmt.Errorf("copying config: %w", err)
}
return config, nil
@@ -909,7 +798,7 @@ func (r *Runtime) Shutdown(force bool) error {
// Note that the libimage runtime shuts down the store.
if err := r.libimageRuntime.Shutdown(force); err != nil {
- lastError = fmt.Errorf("error shutting down container storage: %w", err)
+ lastError = fmt.Errorf("shutting down container storage: %w", err)
}
}
if err := r.state.Close(); err != nil {
@@ -941,15 +830,15 @@ func (r *Runtime) refresh(alivePath string) error {
// Containers, pods, and volumes must also reacquire their locks.
ctrs, err := r.state.AllContainers()
if err != nil {
- return fmt.Errorf("error retrieving all containers from state: %w", err)
+ return fmt.Errorf("retrieving all containers from state: %w", err)
}
pods, err := r.state.AllPods()
if err != nil {
- return fmt.Errorf("error retrieving all pods from state: %w", err)
+ return fmt.Errorf("retrieving all pods from state: %w", err)
}
vols, err := r.state.AllVolumes()
if err != nil {
- return fmt.Errorf("error retrieving all volumes from state: %w", err)
+ return fmt.Errorf("retrieving all volumes from state: %w", err)
}
// No locks are taken during pod, volume, and container refresh.
// Furthermore, the pod/volume/container refresh() functions are not
@@ -977,7 +866,7 @@ func (r *Runtime) refresh(alivePath string) error {
// Create a file indicating the runtime is alive and ready
file, err := os.OpenFile(alivePath, os.O_RDONLY|os.O_CREATE, 0644)
if err != nil {
- return fmt.Errorf("error creating runtime status file: %w", err)
+ return fmt.Errorf("creating runtime status file: %w", err)
}
defer file.Close()
@@ -1141,9 +1030,6 @@ func (r *Runtime) mergeDBConfig(dbConfig *DBConfig) {
logrus.Debugf("Overriding tmp dir %q with %q from database", c.TmpDir, dbConfig.LibpodTmp)
}
c.TmpDir = dbConfig.LibpodTmp
- if c.EventsLogFilePath == "" {
- c.EventsLogFilePath = filepath.Join(dbConfig.LibpodTmp, "events", "events.log")
- }
}
if !r.storageSet.VolumePathSet && dbConfig.VolumePath != "" {
@@ -1208,7 +1094,7 @@ func (r *Runtime) getVolumePlugin(volConfig *VolumeConfig) (*plugin.VolumePlugin
return nil, fmt.Errorf("no volume plugin with name %s available: %w", name, define.ErrMissingPlugin)
}
- return plugin.GetVolumePlugin(name, pluginPath, timeout)
+ return plugin.GetVolumePlugin(name, pluginPath, timeout, r.config)
}
// GetSecretsStorageDir returns the directory that the secrets manager should take
diff --git a/libpod/runtime_cstorage.go b/libpod/runtime_cstorage.go
index 047375628..372434b49 100644
--- a/libpod/runtime_cstorage.go
+++ b/libpod/runtime_cstorage.go
@@ -39,7 +39,7 @@ func (r *Runtime) ListStorageContainers() ([]*StorageContainer, error) {
// Look up if container is in state
hasCtr, err := r.state.HasContainer(ctr.ID)
if err != nil {
- return nil, fmt.Errorf("error looking up container %s in state: %w", ctr.ID, err)
+ return nil, fmt.Errorf("looking up container %s in state: %w", ctr.ID, err)
}
storageCtr.PresentInLibpod = hasCtr
@@ -64,7 +64,7 @@ func (r *Runtime) RemoveStorageContainer(idOrName string, force bool) error {
if errors.Is(err, storage.ErrLayerUnknown) {
return fmt.Errorf("no container with ID or name %q found: %w", idOrName, define.ErrNoSuchCtr)
}
- return fmt.Errorf("error looking up container %q: %w", idOrName, err)
+ return fmt.Errorf("looking up container %q: %w", idOrName, err)
}
// Lookup returns an ID but it's not guaranteed to be a container ID.
@@ -74,7 +74,7 @@ func (r *Runtime) RemoveStorageContainer(idOrName string, force bool) error {
if errors.Is(err, storage.ErrContainerUnknown) {
return fmt.Errorf("%q does not refer to a container: %w", idOrName, define.ErrNoSuchCtr)
}
- return fmt.Errorf("error retrieving container %q: %w", idOrName, err)
+ return fmt.Errorf("retrieving container %q: %w", idOrName, err)
}
// Error out if the container exists in libpod
@@ -115,7 +115,7 @@ func (r *Runtime) RemoveStorageContainer(idOrName string, force bool) error {
logrus.Infof("Storage for container %s already removed", ctr.ID)
return nil
}
- return fmt.Errorf("error removing storage for container %q: %w", idOrName, err)
+ return fmt.Errorf("removing storage for container %q: %w", idOrName, err)
}
return nil
diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go
index ce0fd869d..7b3cbadfa 100644
--- a/libpod/runtime_ctr.go
+++ b/libpod/runtime_ctr.go
@@ -86,7 +86,7 @@ func (r *Runtime) RestoreContainer(ctx context.Context, rSpec *spec.Spec, config
ctr, err := r.initContainerVariables(rSpec, config)
if err != nil {
- return nil, fmt.Errorf("error initializing container variables: %w", err)
+ return nil, fmt.Errorf("initializing container variables: %w", err)
}
// For an imported checkpoint no one has ever set the StartedTime. Set it now.
ctr.state.StartedTime = time.Now()
@@ -126,7 +126,7 @@ func (r *Runtime) RenameContainer(ctx context.Context, ctr *Container, newName s
// the config was re-written.
newConf, err := r.state.GetContainerConfig(ctr.ID())
if err != nil {
- return nil, fmt.Errorf("error retrieving container %s configuration from DB to remove: %w", ctr.ID(), err)
+ return nil, fmt.Errorf("retrieving container %s configuration from DB to remove: %w", ctr.ID(), err)
}
ctr.config = newConf
@@ -143,7 +143,7 @@ func (r *Runtime) RenameContainer(ctx context.Context, ctr *Container, newName s
// Set config back to the old name so reflect what is actually
// present in the DB.
ctr.config.Name = oldName
- return nil, fmt.Errorf("error renaming container %s: %w", ctr.ID(), err)
+ return nil, fmt.Errorf("renaming container %s: %w", ctr.ID(), err)
}
// Step 3: rename the container in c/storage.
@@ -169,14 +169,19 @@ func (r *Runtime) initContainerVariables(rSpec *spec.Spec, config *ContainerConf
ctr.state = new(ContainerState)
if config == nil {
- ctr.config.ID = stringid.GenerateNonCryptoID()
+ ctr.config.ID = stringid.GenerateRandomID()
size, err := units.FromHumanSize(r.config.Containers.ShmSize)
- if err != nil {
- return nil, fmt.Errorf("converting containers.conf ShmSize %s to an int: %w", r.config.Containers.ShmSize, err)
+ if useDevShm {
+ if err != nil {
+ return nil, fmt.Errorf("converting containers.conf ShmSize %s to an int: %w", r.config.Containers.ShmSize, err)
+ }
+ ctr.config.ShmSize = size
+ ctr.config.NoShm = false
+ ctr.config.NoShmShare = false
+ } else {
+ ctr.config.NoShm = true
+ ctr.config.NoShmShare = true
}
- ctr.config.ShmSize = size
- ctr.config.NoShm = false
- ctr.config.NoShmShare = false
ctr.config.StopSignal = 15
ctr.config.StopTimeout = r.config.Engine.StopTimeout
@@ -184,11 +189,11 @@ func (r *Runtime) initContainerVariables(rSpec *spec.Spec, config *ContainerConf
// This is a restore from an imported checkpoint
ctr.restoreFromCheckpoint = true
if err := JSONDeepCopy(config, ctr.config); err != nil {
- return nil, fmt.Errorf("error copying container config for restore: %w", err)
+ return nil, fmt.Errorf("copying container config for restore: %w", err)
}
// If the ID is empty a new name for the restored container was requested
if ctr.config.ID == "" {
- ctr.config.ID = stringid.GenerateNonCryptoID()
+ ctr.config.ID = stringid.GenerateRandomID()
}
// Reset the log path to point to the default
ctr.config.LogPath = ""
@@ -224,12 +229,12 @@ func (r *Runtime) newContainer(ctx context.Context, rSpec *spec.Spec, options ..
ctr, err = r.initContainerVariables(rSpec, nil)
if err != nil {
- return nil, fmt.Errorf("error initializing container variables: %w", err)
+ return nil, fmt.Errorf("initializing container variables: %w", err)
}
for _, option := range options {
if err := option(ctr); err != nil {
- return nil, fmt.Errorf("error running container create option: %w", err)
+ return nil, fmt.Errorf("running container create option: %w", err)
}
}
@@ -296,7 +301,7 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (_ *Contai
// Allocate a lock for the container
lock, err := r.lockManager.AllocateLock()
if err != nil {
- return nil, fmt.Errorf("error allocating lock for new container: %w", err)
+ return nil, fmt.Errorf("allocating lock for new container: %w", err)
}
ctr.lock = lock
ctr.config.LockID = ctr.lock.ID()
@@ -350,7 +355,7 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (_ *Contai
if pod != nil && pod.config.UsePodCgroup && !ctr.IsInfra() {
podCgroup, err := pod.CgroupPath()
if err != nil {
- return nil, fmt.Errorf("error retrieving pod %s cgroup: %w", pod.ID(), err)
+ return nil, fmt.Errorf("retrieving pod %s cgroup: %w", pod.ID(), err)
}
expectPodCgroup, err := ctr.expectPodCgroup()
if err != nil {
@@ -375,7 +380,7 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (_ *Contai
case pod != nil && pod.config.UsePodCgroup && !ctr.IsInfra():
podCgroup, err := pod.CgroupPath()
if err != nil {
- return nil, fmt.Errorf("error retrieving pod %s cgroup: %w", pod.ID(), err)
+ return nil, fmt.Errorf("retrieving pod %s cgroup: %w", pod.ID(), err)
}
ctr.config.CgroupParent = podCgroup
case rootless.IsRootless() && ctr.config.CgroupsMode != cgroupSplit:
@@ -427,7 +432,7 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (_ *Contai
defer func() {
if retErr != nil {
if err := ctr.teardownStorage(); err != nil {
- logrus.Errorf("Removing partially-created container root filesystem: %s", err)
+ logrus.Errorf("Removing partially-created container root filesystem: %v", err)
}
}
}()
@@ -461,7 +466,7 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (_ *Contai
if vol.Name == "" {
// Anonymous volume. We'll need to create it.
// It needs a name first.
- vol.Name = stringid.GenerateNonCryptoID()
+ vol.Name = stringid.GenerateRandomID()
isAnonymous = true
} else {
// Check if it exists already
@@ -471,9 +476,14 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (_ *Contai
// The volume exists, we're good
continue
} else if !errors.Is(err, define.ErrNoSuchVolume) {
- return nil, fmt.Errorf("error retrieving named volume %s for new container: %w", vol.Name, err)
+ return nil, fmt.Errorf("retrieving named volume %s for new container: %w", vol.Name, err)
}
}
+ if vol.IsAnonymous {
+ // If SetAnonymous is true, make this an anonymous volume
+ // this is needed for emptyDir volumes from kube yamls
+ isAnonymous = true
+ }
logrus.Debugf("Creating new volume %s for container", vol.Name)
@@ -504,7 +514,7 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (_ *Contai
}
newVol, err := r.newVolume(false, volOptions...)
if err != nil {
- return nil, fmt.Errorf("error creating named volume %q: %w", vol.Name, err)
+ return nil, fmt.Errorf("creating named volume %q: %w", vol.Name, err)
}
ctrNamedVolumes = append(ctrNamedVolumes, newVol)
@@ -523,7 +533,7 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (_ *Contai
}
}
- if !MountExists(ctr.config.Spec.Mounts, "/dev/shm") && ctr.config.ShmDir == "" && !ctr.config.NoShm {
+ if useDevShm && !MountExists(ctr.config.Spec.Mounts, "/dev/shm") && ctr.config.ShmDir == "" && !ctr.config.NoShm {
ctr.config.ShmDir = filepath.Join(ctr.bundlePath(), "shm")
if err := os.MkdirAll(ctr.config.ShmDir, 0700); err != nil {
if !os.IsExist(err) {
@@ -571,7 +581,7 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (_ *Contai
// be removed also if and only if the container is the sole user
// Otherwise, RemoveContainer will return an error if the container is running
func (r *Runtime) RemoveContainer(ctx context.Context, c *Container, force bool, removeVolume bool, timeout *uint) error {
- return r.removeContainer(ctx, c, force, removeVolume, false, timeout)
+ return r.removeContainer(ctx, c, force, removeVolume, false, false, timeout)
}
// Internal function to remove a container.
@@ -579,7 +589,9 @@ func (r *Runtime) RemoveContainer(ctx context.Context, c *Container, force bool,
// removePod is used only when removing pods. It instructs Podman to ignore
// infra container protections, and *not* remove from the database (as pod
// remove will handle that).
-func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, removeVolume, removePod bool, timeout *uint) error {
+// ignoreDeps is *DANGEROUS* and should not be used outside of a very specific
+// context (alternate pod removal code, where graph traversal is not possible).
+func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, removeVolume, removePod, ignoreDeps bool, timeout *uint) error {
if !c.valid {
if ok, _ := r.state.HasContainer(c.ID()); !ok {
// Container probably already removed
@@ -596,7 +608,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
// exist once we're done.
newConf, err := r.state.GetContainerConfig(c.ID())
if err != nil {
- return fmt.Errorf("error retrieving container %s configuration from DB to remove: %w", c.ID(), err)
+ return fmt.Errorf("retrieving container %s configuration from DB to remove: %w", c.ID(), err)
}
c.config = newConf
@@ -608,25 +620,27 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
// pod.
var pod *Pod
runtime := c.runtime
- if c.config.Pod != "" && !removePod {
+ if c.config.Pod != "" {
pod, err = r.state.Pod(c.config.Pod)
if err != nil {
return fmt.Errorf("container %s is in pod %s, but pod cannot be retrieved: %w", c.ID(), pod.ID(), err)
}
- // Lock the pod while we're removing container
- if pod.config.LockID == c.config.LockID {
- return fmt.Errorf("container %s and pod %s share lock ID %d: %w", c.ID(), pod.ID(), c.config.LockID, define.ErrWillDeadlock)
- }
- pod.lock.Lock()
- defer pod.lock.Unlock()
- if err := pod.updatePod(); err != nil {
- return err
- }
+ if !removePod {
+ // Lock the pod while we're removing container
+ if pod.config.LockID == c.config.LockID {
+ return fmt.Errorf("container %s and pod %s share lock ID %d: %w", c.ID(), pod.ID(), c.config.LockID, define.ErrWillDeadlock)
+ }
+ pod.lock.Lock()
+ defer pod.lock.Unlock()
+ if err := pod.updatePod(); err != nil {
+ return err
+ }
- infraID := pod.state.InfraContainerID
- if c.ID() == infraID {
- return fmt.Errorf("container %s is the infra container of pod %s and cannot be removed without removing the pod", c.ID(), pod.ID())
+ infraID := pod.state.InfraContainerID
+ if c.ID() == infraID {
+ return fmt.Errorf("container %s is the infra container of pod %s and cannot be removed without removing the pod", c.ID(), pod.ID())
+ }
}
}
@@ -686,7 +700,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
// Check that no other containers depend on the container.
// Only used if not removing a pod - pods guarantee that all
// deps will be evicted at the same time.
- if !removePod {
+ if !ignoreDeps {
deps, err := r.state.ContainerInUse(c)
if err != nil {
return err
@@ -717,7 +731,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
if ok, _ := r.state.HasContainer(c.ID()); !ok {
// When the container has already been removed, the OCI runtime directory remain.
if err := c.cleanupRuntime(ctx); err != nil {
- return fmt.Errorf("error cleaning up container %s from OCI runtime: %w", c.ID(), err)
+ return fmt.Errorf("cleaning up container %s from OCI runtime: %w", c.ID(), err)
}
return nil
}
@@ -729,7 +743,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
// Do this before we set ContainerStateRemoving, to ensure that we can
// actually remove from the OCI runtime.
if err := c.cleanup(ctx); err != nil {
- cleanupErr = fmt.Errorf("error cleaning up container %s: %w", c.ID(), err)
+ cleanupErr = fmt.Errorf("cleaning up container %s: %w", c.ID(), err)
}
// Set ContainerStateRemoving
@@ -767,13 +781,11 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
if c.config.Pod != "" {
// If we're removing the pod, the container will be evicted
// from the state elsewhere
- if !removePod {
- if err := r.state.RemoveContainerFromPod(pod, c); err != nil {
- if cleanupErr == nil {
- cleanupErr = err
- } else {
- logrus.Errorf("Removing container %s from database: %v", c.ID(), err)
- }
+ if err := r.state.RemoveContainerFromPod(pod, c); err != nil {
+ if cleanupErr == nil {
+ cleanupErr = err
+ } else {
+ logrus.Errorf("Removing container %s from database: %v", c.ID(), err)
}
}
} else {
@@ -788,8 +800,8 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
// Deallocate the container's lock
if err := c.lock.Free(); err != nil {
- if cleanupErr == nil {
- cleanupErr = fmt.Errorf("error freeing lock for container %s: %w", c.ID(), err)
+ if cleanupErr == nil && !os.IsNotExist(err) {
+ cleanupErr = fmt.Errorf("freeing lock for container %s: %w", c.ID(), err)
} else {
logrus.Errorf("Free container lock: %v", err)
}
@@ -814,11 +826,11 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
// Ignore error, since podman will report original error
volumesFrom, _ := c.volumesFrom()
if len(volumesFrom) > 0 {
- logrus.Debugf("Cleaning up volume not possible since volume is in use (%s)", v)
+ logrus.Debugf("Cleaning up volume not possible since volume is in use (%s)", v.Name)
continue
}
}
- logrus.Errorf("Cleaning up volume (%s): %v", v, err)
+ logrus.Errorf("Cleaning up volume (%s): %v", v.Name, err)
}
}
}
@@ -862,7 +874,7 @@ func (r *Runtime) evictContainer(ctx context.Context, idOrName string, removeVol
if err == nil {
logrus.Infof("Container %s successfully retrieved from state, attempting normal removal", id)
// Assume force = true for the evict case
- err = r.removeContainer(ctx, tmpCtr, true, removeVolume, false, timeout)
+ err = r.removeContainer(ctx, tmpCtr, true, removeVolume, false, false, timeout)
if !tmpCtr.valid {
// If the container is marked invalid, remove succeeded
// in kicking it out of the state - no need to continue.
@@ -968,7 +980,7 @@ func (r *Runtime) evictContainer(ctx context.Context, idOrName string, removeVol
continue
}
if err := r.removeVolume(ctx, volume, false, timeout, false); err != nil && err != define.ErrNoSuchVolume && err != define.ErrVolumeBeingUsed {
- logrus.Errorf("Cleaning up volume (%s): %v", v, err)
+ logrus.Errorf("Cleaning up volume (%s): %v", v.Name, err)
}
}
}
@@ -995,7 +1007,7 @@ func (r *Runtime) RemoveDepend(ctx context.Context, rmCtr *Container, force bool
return nil, err
}
for _, cID := range podContainerIDS {
- rmReports = append(rmReports, &reports.RmReport{Id: cID})
+ rmReports = append(rmReports, &reports.RmReport{Id: cID, RawInput: cID})
}
return rmReports, nil
}
@@ -1023,8 +1035,8 @@ func (r *Runtime) RemoveDepend(ctx context.Context, rmCtr *Container, force bool
rmReports = append(rmReports, reports...)
}
- report := reports.RmReport{Id: rmCtr.ID()}
- report.Err = r.removeContainer(ctx, rmCtr, force, removeVolume, false, timeout)
+ report := reports.RmReport{Id: rmCtr.ID(), RawInput: rmCtr.ID()}
+ report.Err = r.removeContainer(ctx, rmCtr, force, removeVolume, false, false, timeout)
return append(rmReports, &report), nil
}
@@ -1217,7 +1229,7 @@ func (r *Runtime) MountStorageContainer(id string) (string, error) {
}
mountPoint, err := r.store.Mount(container.ID, "")
if err != nil {
- return "", fmt.Errorf("error mounting storage for container %s: %w", id, err)
+ return "", fmt.Errorf("mounting storage for container %s: %w", id, err)
}
return mountPoint, nil
}
@@ -1265,7 +1277,7 @@ func (r *Runtime) StorageContainers() ([]storage.Container, error) {
storeContainers, err := r.store.Containers()
if err != nil {
- return nil, fmt.Errorf("error reading list of all storage containers: %w", err)
+ return nil, fmt.Errorf("reading list of all storage containers: %w", err)
}
retCtrs := []storage.Container{}
for _, container := range storeContainers {
diff --git a/libpod/runtime_ctr_freebsd.go b/libpod/runtime_ctr_freebsd.go
new file mode 100644
index 000000000..a8870a38c
--- /dev/null
+++ b/libpod/runtime_ctr_freebsd.go
@@ -0,0 +1,5 @@
+package libpod
+
+const (
+ useDevShm = false
+)
diff --git a/libpod/runtime_ctr_linux.go b/libpod/runtime_ctr_linux.go
new file mode 100644
index 000000000..7812d8238
--- /dev/null
+++ b/libpod/runtime_ctr_linux.go
@@ -0,0 +1,5 @@
+package libpod
+
+const (
+ useDevShm = true
+)
diff --git a/libpod/runtime_img.go b/libpod/runtime_img.go
index d04607d2e..dacbd752f 100644
--- a/libpod/runtime_img.go
+++ b/libpod/runtime_img.go
@@ -47,7 +47,7 @@ func (r *Runtime) RemoveContainersForImageCallback(ctx context.Context) libimage
return fmt.Errorf("removing image %s: container %s using image could not be removed: %w", imageID, ctr.ID(), err)
}
} else {
- if err := r.removeContainer(ctx, ctr, true, false, false, timeout); err != nil {
+ if err := r.removeContainer(ctx, ctr, true, false, false, false, timeout); err != nil {
return fmt.Errorf("removing image %s: container %s using image could not be removed: %w", imageID, ctr.ID(), err)
}
}
@@ -107,7 +107,7 @@ func (r *Runtime) Build(ctx context.Context, options buildahDefine.BuildOptions,
func DownloadFromFile(reader *os.File) (string, error) {
outFile, err := ioutil.TempFile(util.Tmpdir(), "import")
if err != nil {
- return "", fmt.Errorf("error creating file: %w", err)
+ return "", fmt.Errorf("creating file: %w", err)
}
defer outFile.Close()
@@ -115,7 +115,7 @@ func DownloadFromFile(reader *os.File) (string, error) {
_, err = io.Copy(outFile, reader)
if err != nil {
- return "", fmt.Errorf("error saving %s to %s: %w", reader.Name(), outFile.Name(), err)
+ return "", fmt.Errorf("saving %s to %s: %w", reader.Name(), outFile.Name(), err)
}
return outFile.Name(), nil
diff --git a/libpod/runtime_migrate.go b/libpod/runtime_migrate.go
index 139638a6b..36901d4d0 100644
--- a/libpod/runtime_migrate.go
+++ b/libpod/runtime_migrate.go
@@ -92,7 +92,7 @@ func (r *Runtime) migrate() error {
if needsWrite {
if err := r.state.RewriteContainerConfig(ctr, ctr.config); err != nil {
- return fmt.Errorf("error rewriting config for container %s: %w", ctr.ID(), err)
+ return fmt.Errorf("rewriting config for container %s: %w", ctr.ID(), err)
}
}
}
diff --git a/libpod/runtime_migrate_unsupported.go b/libpod/runtime_migrate_unsupported.go
new file mode 100644
index 000000000..77c2737a9
--- /dev/null
+++ b/libpod/runtime_migrate_unsupported.go
@@ -0,0 +1,16 @@
+//go:build !linux
+// +build !linux
+
+package libpod
+
+import (
+ "errors"
+)
+
+func (r *Runtime) stopPauseProcess() error {
+ return errors.New("not implemented (*Runtime) stopPauseProcess")
+}
+
+func (r *Runtime) migrate() error {
+ return errors.New("not implemented (*Runtime) migrate")
+}
diff --git a/libpod/runtime_pod_linux.go b/libpod/runtime_pod_linux.go
index 57c0b5c48..24e9f3da7 100644
--- a/libpod/runtime_pod_linux.go
+++ b/libpod/runtime_pod_linux.go
@@ -17,6 +17,7 @@ import (
"github.com/containers/podman/v4/libpod/events"
"github.com/containers/podman/v4/pkg/rootless"
"github.com/containers/podman/v4/pkg/specgen"
+ "github.com/hashicorp/go-multierror"
"github.com/sirupsen/logrus"
)
@@ -36,14 +37,14 @@ func (r *Runtime) NewPod(ctx context.Context, p specgen.PodSpecGenerator, option
for _, option := range options {
if err := option(pod); err != nil {
- return nil, fmt.Errorf("error running pod create option: %w", err)
+ return nil, fmt.Errorf("running pod create option: %w", err)
}
}
// Allocate a lock for the pod
lock, err := r.lockManager.AllocateLock()
if err != nil {
- return nil, fmt.Errorf("error allocating lock for new pod: %w", err)
+ return nil, fmt.Errorf("allocating lock for new pod: %w", err)
}
pod.lock = lock
pod.config.LockID = pod.lock.ID()
@@ -160,7 +161,7 @@ func (r *Runtime) NewPod(ctx context.Context, p specgen.PodSpecGenerator, option
}
}
if addPodErr != nil {
- return nil, fmt.Errorf("error adding pod to state: %w", addPodErr)
+ return nil, fmt.Errorf("adding pod to state: %w", addPodErr)
}
return pod, nil
@@ -191,29 +192,9 @@ func (r *Runtime) SavePod(pod *Pod) error {
return nil
}
-func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool, timeout *uint) error {
- if err := p.updatePod(); err != nil {
- return err
- }
-
- ctrs, err := r.state.PodContainers(p)
- if err != nil {
- return err
- }
- numCtrs := len(ctrs)
-
- // If the only running container in the pod is the pause container, remove the pod and container unconditionally.
- pauseCtrID := p.state.InfraContainerID
- if numCtrs == 1 && ctrs[0].ID() == pauseCtrID {
- removeCtrs = true
- force = true
- }
- if !removeCtrs && numCtrs > 0 {
- return fmt.Errorf("pod %s contains containers and cannot be removed: %w", p.ID(), define.ErrCtrExists)
- }
-
- ctrNamedVolumes := make(map[string]*ContainerNamedVolume)
-
+// DO NOT USE THIS FUNCTION DIRECTLY. Use removePod(), below. It will call
+// removeMalformedPod() if necessary.
+func (r *Runtime) removeMalformedPod(ctx context.Context, p *Pod, ctrs []*Container, force bool, timeout *uint, ctrNamedVolumes map[string]*ContainerNamedVolume) error {
var removalErr error
for _, ctr := range ctrs {
err := func() error {
@@ -231,7 +212,7 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool,
ctrNamedVolumes[vol.Name] = vol
}
- return r.removeContainer(ctx, ctr, force, false, true, timeout)
+ return r.removeContainer(ctx, ctr, force, false, true, true, timeout)
}()
if removalErr == nil {
@@ -261,6 +242,69 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool,
return err
}
+ return nil
+}
+
+func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool, timeout *uint) error {
+ if err := p.updatePod(); err != nil {
+ return err
+ }
+
+ ctrs, err := r.state.PodContainers(p)
+ if err != nil {
+ return err
+ }
+ numCtrs := len(ctrs)
+
+ // If the only running container in the pod is the pause container, remove the pod and container unconditionally.
+ pauseCtrID := p.state.InfraContainerID
+ if numCtrs == 1 && ctrs[0].ID() == pauseCtrID {
+ removeCtrs = true
+ force = true
+ }
+ if !removeCtrs && numCtrs > 0 {
+ return fmt.Errorf("pod %s contains containers and cannot be removed: %w", p.ID(), define.ErrCtrExists)
+ }
+
+ var removalErr error
+ ctrNamedVolumes := make(map[string]*ContainerNamedVolume)
+
+ // Build a graph of all containers in the pod.
+ graph, err := BuildContainerGraph(ctrs)
+ if err != nil {
+ // We have to allow the pod to be removed.
+ // But let's only do it if force is set.
+ if !force {
+ return fmt.Errorf("cannot create container graph for pod %s: %w", p.ID(), err)
+ }
+
+ removalErr = fmt.Errorf("creating container graph for pod %s failed, fell back to loop removal: %w", p.ID(), err)
+
+ if err := r.removeMalformedPod(ctx, p, ctrs, force, timeout, ctrNamedVolumes); err != nil {
+ logrus.Errorf("Error creating container graph for pod %s: %v. Falling back to loop removal.", p.ID(), err)
+ return err
+ }
+ } else {
+ ctrErrors := make(map[string]error)
+ ctrsVisited := make(map[string]bool)
+
+ for _, node := range graph.notDependedOnNodes {
+ removeNode(ctx, node, p, force, timeout, false, ctrErrors, ctrsVisited, ctrNamedVolumes)
+ }
+
+ // This is gross, but I don't want to change the signature on
+ // removePod - especially since any change here eventually has
+ // to map down to one error unless we want to make a breaking
+ // API change.
+ if len(ctrErrors) > 0 {
+ var allErrs error
+ for id, err := range ctrErrors {
+ allErrs = multierror.Append(allErrs, fmt.Errorf("removing container %s from pod %s: %w", id, p.ID(), err))
+ }
+ return allErrs
+ }
+ }
+
for volName := range ctrNamedVolumes {
volume, err := r.state.Volume(volName)
if err != nil && !errors.Is(err, define.ErrNoSuchVolume) {
@@ -286,7 +330,7 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool,
case config.SystemdCgroupsManager:
if err := deleteSystemdCgroup(p.state.CgroupPath, p.ResourceLim()); err != nil {
if removalErr == nil {
- removalErr = fmt.Errorf("error removing pod %s cgroup: %w", p.ID(), err)
+ removalErr = fmt.Errorf("removing pod %s cgroup: %w", p.ID(), err)
} else {
logrus.Errorf("Deleting pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err)
}
@@ -300,7 +344,7 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool,
conmonCgroup, err := cgroups.Load(conmonCgroupPath)
if err != nil && err != cgroups.ErrCgroupDeleted && err != cgroups.ErrCgroupV1Rootless {
if removalErr == nil {
- removalErr = fmt.Errorf("error retrieving pod %s conmon cgroup: %w", p.ID(), err)
+ removalErr = fmt.Errorf("retrieving pod %s conmon cgroup: %w", p.ID(), err)
} else {
logrus.Debugf("Error retrieving pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err)
}
@@ -308,7 +352,7 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool,
if err == nil {
if err = conmonCgroup.Delete(); err != nil {
if removalErr == nil {
- removalErr = fmt.Errorf("error removing pod %s conmon cgroup: %w", p.ID(), err)
+ removalErr = fmt.Errorf("removing pod %s conmon cgroup: %w", p.ID(), err)
} else {
logrus.Errorf("Deleting pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err)
}
@@ -317,7 +361,7 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool,
cgroup, err := cgroups.Load(p.state.CgroupPath)
if err != nil && err != cgroups.ErrCgroupDeleted && err != cgroups.ErrCgroupV1Rootless {
if removalErr == nil {
- removalErr = fmt.Errorf("error retrieving pod %s cgroup: %w", p.ID(), err)
+ removalErr = fmt.Errorf("retrieving pod %s cgroup: %w", p.ID(), err)
} else {
logrus.Errorf("Retrieving pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err)
}
@@ -325,7 +369,7 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool,
if err == nil {
if err := cgroup.Delete(); err != nil {
if removalErr == nil {
- removalErr = fmt.Errorf("error removing pod %s cgroup: %w", p.ID(), err)
+ removalErr = fmt.Errorf("removing pod %s cgroup: %w", p.ID(), err)
} else {
logrus.Errorf("Deleting pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err)
}
@@ -362,7 +406,7 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool,
// Deallocate the pod lock
if err := p.lock.Free(); err != nil {
if removalErr == nil {
- removalErr = fmt.Errorf("error freeing pod %s lock: %w", p.ID(), err)
+ removalErr = fmt.Errorf("freeing pod %s lock: %w", p.ID(), err)
} else {
logrus.Errorf("Freeing pod %s lock: %v", p.ID(), err)
}
diff --git a/libpod/runtime_pod_unsupported.go b/libpod/runtime_pod_unsupported.go
new file mode 100644
index 000000000..0c7ff8655
--- /dev/null
+++ b/libpod/runtime_pod_unsupported.go
@@ -0,0 +1,30 @@
+//go:build !linux
+// +build !linux
+
+package libpod
+
+import (
+ "context"
+ "errors"
+
+ "github.com/containers/podman/v4/pkg/specgen"
+)
+
+// NewPod makes a new, empty pod
+func (r *Runtime) NewPod(ctx context.Context, p specgen.PodSpecGenerator, options ...PodCreateOption) (_ *Pod, deferredErr error) {
+ return nil, errors.New("not implemented (*Runtime) NewPod")
+}
+
+// AddInfra adds the created infra container to the pod state
+func (r *Runtime) AddInfra(ctx context.Context, pod *Pod, infraCtr *Container) (*Pod, error) {
+ return nil, errors.New("not implemented (*Runtime) AddInfra")
+}
+
+// SavePod is a helper function to save the pod state from outside of libpod
+func (r *Runtime) SavePod(pod *Pod) error {
+ return errors.New("not implemented (*Runtime) SavePod")
+}
+
+func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool, timeout *uint) error {
+ return errors.New("not implemented (*Runtime) removePod")
+}
diff --git a/libpod/runtime_renumber.go b/libpod/runtime_renumber.go
index 9149dd72f..ff70081d8 100644
--- a/libpod/runtime_renumber.go
+++ b/libpod/runtime_renumber.go
@@ -27,7 +27,7 @@ func (r *Runtime) renumberLocks() error {
for _, ctr := range allCtrs {
lock, err := r.lockManager.AllocateLock()
if err != nil {
- return fmt.Errorf("error allocating lock for container %s: %w", ctr.ID(), err)
+ return fmt.Errorf("allocating lock for container %s: %w", ctr.ID(), err)
}
ctr.config.LockID = lock.ID()
@@ -44,7 +44,7 @@ func (r *Runtime) renumberLocks() error {
for _, pod := range allPods {
lock, err := r.lockManager.AllocateLock()
if err != nil {
- return fmt.Errorf("error allocating lock for pod %s: %w", pod.ID(), err)
+ return fmt.Errorf("allocating lock for pod %s: %w", pod.ID(), err)
}
pod.config.LockID = lock.ID()
@@ -61,7 +61,7 @@ func (r *Runtime) renumberLocks() error {
for _, vol := range allVols {
lock, err := r.lockManager.AllocateLock()
if err != nil {
- return fmt.Errorf("error allocating lock for volume %s: %w", vol.Name(), err)
+ return fmt.Errorf("allocating lock for volume %s: %w", vol.Name(), err)
}
vol.config.LockID = lock.ID()
diff --git a/libpod/runtime_test.go b/libpod/runtime_test.go
new file mode 100644
index 000000000..2e16c7fcd
--- /dev/null
+++ b/libpod/runtime_test.go
@@ -0,0 +1,28 @@
+package libpod
+
+import (
+ "math/rand"
+ "os"
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+)
+
+func Test_generateName(t *testing.T) {
+ state, path, _, err := getEmptyBoltState()
+ assert.NoError(t, err)
+ defer os.RemoveAll(path)
+ defer state.Close()
+
+ r := &Runtime{
+ state: state,
+ }
+
+ // Test that (*Runtime).generateName returns different names
+ // if called twice, even if the global RNG has the default
+ // seed.
+ n1, _ := r.generateName()
+ rand.Seed(1)
+ n2, _ := r.generateName()
+ assert.NotEqual(t, n1, n2)
+}
diff --git a/libpod/runtime_volume_linux.go b/libpod/runtime_volume_linux.go
index 1f354e41b..08fdbf977 100644
--- a/libpod/runtime_volume_linux.go
+++ b/libpod/runtime_volume_linux.go
@@ -42,7 +42,7 @@ func (r *Runtime) newVolume(noCreatePluginVolume bool, options ...VolumeCreateOp
}
if volume.config.Name == "" {
- volume.config.Name = stringid.GenerateNonCryptoID()
+ volume.config.Name = stringid.GenerateRandomID()
}
if volume.config.Driver == "" {
volume.config.Driver = define.VolumeDriverLocal
@@ -184,7 +184,7 @@ func (r *Runtime) UpdateVolumePlugins(ctx context.Context) *define.VolumeReload
)
for driverName, socket := range r.config.Engine.VolumePlugins {
- driver, err := volplugin.GetVolumePlugin(driverName, socket, 0)
+ driver, err := volplugin.GetVolumePlugin(driverName, socket, nil, r.config)
if err != nil {
errs = append(errs, err)
continue
@@ -324,7 +324,7 @@ func (r *Runtime) removeVolume(ctx context.Context, v *Volume, force bool, timeo
logrus.Debugf("Removing container %s (depends on volume %q)", ctr.ID(), v.Name())
- if err := r.removeContainer(ctx, ctr, force, false, false, timeout); err != nil {
+ if err := r.removeContainer(ctx, ctr, force, false, false, false, timeout); err != nil {
return fmt.Errorf("removing container %s that depends on volume %s: %w", ctr.ID(), v.Name(), err)
}
}
diff --git a/libpod/runtime_volume_unsupported.go b/libpod/runtime_volume_unsupported.go
new file mode 100644
index 000000000..c2816b817
--- /dev/null
+++ b/libpod/runtime_volume_unsupported.go
@@ -0,0 +1,42 @@
+//go:build !linux
+// +build !linux
+
+package libpod
+
+import (
+ "context"
+ "errors"
+
+ "github.com/containers/podman/v4/libpod/define"
+)
+
+// NewVolume creates a new empty volume
+func (r *Runtime) NewVolume(ctx context.Context, options ...VolumeCreateOption) (*Volume, error) {
+ if !r.valid {
+ return nil, define.ErrRuntimeStopped
+ }
+ return r.newVolume(false, options...)
+}
+
+// NewVolume creates a new empty volume
+func (r *Runtime) newVolume(noCreatePluginVolume bool, options ...VolumeCreateOption) (*Volume, error) {
+ return nil, errors.New("not implemented (*Runtime) newVolume")
+}
+
+// UpdateVolumePlugins reads all volumes from all configured volume plugins and
+// imports them into the libpod db. It also checks if existing libpod volumes
+// are removed in the plugin, in this case we try to remove it from libpod.
+// On errors we continue and try to do as much as possible. all errors are
+// returned as array in the returned struct.
+// This function has many race conditions, it is best effort but cannot guarantee
+// a perfect state since plugins can be modified from the outside at any time.
+func (r *Runtime) UpdateVolumePlugins(ctx context.Context) *define.VolumeReload {
+ return nil
+}
+
+// removeVolume removes the specified volume from state as well tears down its mountpoint and storage.
+// ignoreVolumePlugin is used to only remove the volume from the db and not the plugin,
+// this is required when the volume was already removed from the plugin, i.e. in UpdateVolumePlugins().
+func (r *Runtime) removeVolume(ctx context.Context, v *Volume, force bool, timeout *uint, ignoreVolumePlugin bool) error {
+ return errors.New("not implemented (*Runtime) removeVolume")
+}
diff --git a/libpod/stats_common.go b/libpod/stats_common.go
new file mode 100644
index 000000000..122160bda
--- /dev/null
+++ b/libpod/stats_common.go
@@ -0,0 +1,49 @@
+//go:build linux || freebsd
+// +build linux freebsd
+
+package libpod
+
+import (
+ "fmt"
+
+ "github.com/containers/podman/v4/libpod/define"
+)
+
+// GetContainerStats gets the running stats for a given container.
+// The previousStats is used to correctly calculate cpu percentages. You
+// should pass nil if there is no previous stat for this container.
+func (c *Container) GetContainerStats(previousStats *define.ContainerStats) (*define.ContainerStats, error) {
+ stats := new(define.ContainerStats)
+ stats.ContainerID = c.ID()
+ stats.Name = c.Name()
+
+ if c.config.NoCgroups {
+ return nil, fmt.Errorf("cannot run top on container %s as it did not create a cgroup: %w", c.ID(), define.ErrNoCgroups)
+ }
+
+ if !c.batched {
+ c.lock.Lock()
+ defer c.lock.Unlock()
+ if err := c.syncContainer(); err != nil {
+ return stats, err
+ }
+ }
+
+ // returns stats with the fields' default values respective of their type
+ if c.state.State != define.ContainerStateRunning && c.state.State != define.ContainerStatePaused {
+ return stats, nil
+ }
+
+ if previousStats == nil {
+ previousStats = &define.ContainerStats{
+ // if we have no prev stats use the container start time as prev time
+ // otherwise we cannot correctly calculate the CPU percentage
+ SystemNano: uint64(c.state.StartedTime.UnixNano()),
+ }
+ }
+
+ if err := c.getPlatformContainerStats(stats, previousStats); err != nil {
+ return nil, err
+ }
+ return stats, nil
+}
diff --git a/libpod/stats_freebsd.go b/libpod/stats_freebsd.go
new file mode 100644
index 000000000..53bc3f19a
--- /dev/null
+++ b/libpod/stats_freebsd.go
@@ -0,0 +1,153 @@
+package libpod
+
+import (
+ "fmt"
+ "math"
+ "strings"
+ "time"
+
+ "github.com/containers/common/pkg/cgroups"
+ "github.com/containers/podman/v4/libpod/define"
+ "github.com/containers/podman/v4/pkg/rctl"
+ "github.com/containers/storage/pkg/system"
+ "github.com/sirupsen/logrus"
+)
+
+// getPlatformContainerStats gets the platform-specific running stats
+// for a given container. The previousStats is used to correctly
+// calculate cpu percentages. You should pass nil if there is no
+// previous stat for this container.
+func (c *Container) getPlatformContainerStats(stats *define.ContainerStats, previousStats *define.ContainerStats) error {
+ now := uint64(time.Now().UnixNano())
+
+ jailName := c.ID()
+ if c.state.NetNS != nil {
+ jailName = c.state.NetNS.Name + "." + jailName
+ }
+ entries, err := rctl.GetRacct("jail:" + jailName)
+ if err != nil {
+ return fmt.Errorf("unable to read accounting for %s: %w", jailName, err)
+ }
+
+ // If the current total usage is less than what was previously
+ // recorded then it means the container was restarted and runs
+ // in a new jail
+ if dur, ok := entries["wallclock"]; ok {
+ if previousStats.Duration > dur*1000000000 {
+ previousStats = &define.ContainerStats{}
+ }
+ }
+
+ for key, val := range entries {
+ switch key {
+ case "cputime": // CPU time, in seconds
+ stats.CPUNano = val * 1000000000
+ stats.AvgCPU = calculateCPUPercent(stats.CPUNano, 0, now, uint64(c.state.StartedTime.UnixNano()))
+ case "datasize": // data size, in bytes
+ case "stacksize": // stack size, in bytes
+ case "coredumpsize": // core dump size, in bytes
+ case "memoryuse": // resident set size, in bytes
+ stats.MemUsage = val
+ case "memorylocked": // locked memory, in bytes
+ case "maxproc": // number of processes
+ stats.PIDs = val
+ case "openfiles": // file descriptor table size
+ case "vmemoryuse": // address space limit, in bytes
+ case "pseudoterminals": // number of PTYs
+ case "swapuse": // swap space that may be reserved or used, in bytes
+ case "nthr": // number of threads
+ case "msgqqueued": // number of queued SysV messages
+ case "msgqsize": // SysV message queue size, in bytes
+ case "nmsgq": // number of SysV message queues
+ case "nsem": // number of SysV semaphores
+ case "nsemop": // number of SysV semaphores modified in a single semop(2) call
+ case "nshm": // number of SysV shared memory segments
+ case "shmsize": // SysV shared memory size, in bytes
+ case "wallclock": // wallclock time, in seconds
+ stats.Duration = val * 1000000000
+ stats.UpTime = time.Duration(stats.Duration)
+ case "pcpu": // %CPU, in percents of a single CPU core
+ stats.CPU = float64(val)
+ case "readbps": // filesystem reads, in bytes per second
+ stats.BlockInput = val
+ case "writebps": // filesystem writes, in bytes per second
+ stats.BlockOutput = val
+ case "readiops": // filesystem reads, in operations per second
+ case "writeiops": // filesystem writes, in operations per second
+ }
+ }
+ stats.MemLimit = c.getMemLimit()
+ stats.SystemNano = now
+
+ netStats, err := getContainerNetIO(c)
+ if err != nil {
+ return err
+ }
+
+ // Handle case where the container is not in a network namespace
+ if netStats != nil {
+ stats.NetInput = netStats.TxBytes
+ stats.NetOutput = netStats.RxBytes
+ } else {
+ stats.NetInput = 0
+ stats.NetOutput = 0
+ }
+
+ return nil
+}
+
+// getMemory limit returns the memory limit for a container
+func (c *Container) getMemLimit() uint64 {
+ memLimit := uint64(math.MaxUint64)
+
+ if c.config.Spec.Linux != nil && c.config.Spec.Linux.Resources != nil &&
+ c.config.Spec.Linux.Resources.Memory != nil && c.config.Spec.Linux.Resources.Memory.Limit != nil {
+ memLimit = uint64(*c.config.Spec.Linux.Resources.Memory.Limit)
+ }
+
+ mi, err := system.ReadMemInfo()
+ if err != nil {
+ logrus.Errorf("ReadMemInfo error: %v", err)
+ return 0
+ }
+
+ //nolint:unconvert
+ physicalLimit := uint64(mi.MemTotal)
+
+ if memLimit <= 0 || memLimit > physicalLimit {
+ return physicalLimit
+ }
+
+ return memLimit
+}
+
+// calculateCPUPercent calculates the cpu usage using the latest measurement in stats.
+// previousCPU is the last value of stats.CPU.Usage.Total measured at the time previousSystem.
+//
+// (now - previousSystem) is the time delta in nanoseconds, between the measurement in previousCPU
+//
+// and the updated value in stats.
+func calculateCPUPercent(currentCPU, previousCPU, now, previousSystem uint64) float64 {
+ var (
+ cpuPercent = 0.0
+ cpuDelta = float64(currentCPU - previousCPU)
+ systemDelta = float64(now - previousSystem)
+ )
+ if systemDelta > 0.0 && cpuDelta > 0.0 {
+ // gets a ratio of container cpu usage total, and multiplies that by 100 to get a percentage
+ cpuPercent = (cpuDelta / systemDelta) * 100
+ }
+ return cpuPercent
+}
+
+func calculateBlockIO(stats *cgroups.Metrics) (read uint64, write uint64) {
+ for _, blkIOEntry := range stats.Blkio.IoServiceBytesRecursive {
+ switch strings.ToLower(blkIOEntry.Op) {
+ case "read":
+ read += blkIOEntry.Value
+ case "write":
+ write += blkIOEntry.Value
+ }
+ }
+ return
+}
diff --git a/libpod/stats.go b/libpod/stats_linux.go
index c7e9e5128..ad8f33c91 100644
--- a/libpod/stats.go
+++ b/libpod/stats_linux.go
@@ -16,57 +16,33 @@ import (
"github.com/containers/podman/v4/libpod/define"
)
-// GetContainerStats gets the running stats for a given container.
-// The previousStats is used to correctly calculate cpu percentages. You
-// should pass nil if there is no previous stat for this container.
-func (c *Container) GetContainerStats(previousStats *define.ContainerStats) (*define.ContainerStats, error) {
- stats := new(define.ContainerStats)
- stats.ContainerID = c.ID()
- stats.Name = c.Name()
-
+// getPlatformContainerStats gets the platform-specific running stats
+// for a given container. The previousStats is used to correctly
+// calculate cpu percentages. You should pass nil if there is no
+// previous stat for this container.
+func (c *Container) getPlatformContainerStats(stats *define.ContainerStats, previousStats *define.ContainerStats) error {
if c.config.NoCgroups {
- return nil, fmt.Errorf("cannot run top on container %s as it did not create a cgroup: %w", c.ID(), define.ErrNoCgroups)
- }
-
- if !c.batched {
- c.lock.Lock()
- defer c.lock.Unlock()
- if err := c.syncContainer(); err != nil {
- return stats, err
- }
- }
-
- // returns stats with the fields' default values respective of their type
- if c.state.State != define.ContainerStateRunning && c.state.State != define.ContainerStatePaused {
- return stats, nil
- }
-
- if previousStats == nil {
- previousStats = &define.ContainerStats{
- // if we have no prev stats use the container start time as prev time
- // otherwise we cannot correctly calculate the CPU percentage
- SystemNano: uint64(c.state.StartedTime.UnixNano()),
- }
+ return fmt.Errorf("cannot run top on container %s as it did not create a cgroup: %w", c.ID(), define.ErrNoCgroups)
}
cgroupPath, err := c.cGroupPath()
if err != nil {
- return nil, err
+ return err
}
cgroup, err := cgroups.Load(cgroupPath)
if err != nil {
- return stats, fmt.Errorf("unable to load cgroup at %s: %w", cgroupPath, err)
+ return fmt.Errorf("unable to load cgroup at %s: %w", cgroupPath, err)
}
// Ubuntu does not have swap memory in cgroups because swap is often not enabled.
cgroupStats, err := cgroup.Stat()
if err != nil {
- return stats, fmt.Errorf("unable to obtain cgroup stats: %w", err)
+ return fmt.Errorf("unable to obtain cgroup stats: %w", err)
}
conState := c.state.State
netStats, err := getContainerNetIO(c)
if err != nil {
- return nil, err
+ return err
}
// If the current total usage in the cgroup is less than what was previously
@@ -103,7 +79,7 @@ func (c *Container) GetContainerStats(previousStats *define.ContainerStats) (*de
stats.NetOutput = 0
}
- return stats, nil
+ return nil
}
// getMemory limit returns the memory limit for a container
@@ -133,7 +109,9 @@ func (c *Container) getMemLimit() uint64 {
// calculateCPUPercent calculates the cpu usage using the latest measurement in stats.
// previousCPU is the last value of stats.CPU.Usage.Total measured at the time previousSystem.
-// (now - previousSystem) is the time delta in nanoseconds, between the measurement in previousCPU
+//
+// (now - previousSystem) is the time delta in nanoseconds, between the measurement in previousCPU
+//
// and the updated value in stats.
func calculateCPUPercent(stats *runccgroup.Stats, previousCPU, now, previousSystem uint64) float64 {
var (
diff --git a/libpod/stats_unsupported.go b/libpod/stats_unsupported.go
new file mode 100644
index 000000000..3094e2eaa
--- /dev/null
+++ b/libpod/stats_unsupported.go
@@ -0,0 +1,17 @@
+//go:build !linux && !freebsd
+// +build !linux,!freebsd
+
+package libpod
+
+import (
+ "errors"
+
+ "github.com/containers/podman/v4/libpod/define"
+)
+
+// GetContainerStats gets the running stats for a given container.
+// The previousStats is used to correctly calculate cpu percentages. You
+// should pass nil if there is no previous stat for this container.
+func (c *Container) GetContainerStats(previousStats *define.ContainerStats) (*define.ContainerStats, error) {
+ return nil, errors.New("not implemented (*Container) GetContainerStats")
+}
diff --git a/libpod/util.go b/libpod/util.go
index a6e6a4f3e..c5a2b81bd 100644
--- a/libpod/util.go
+++ b/libpod/util.go
@@ -231,7 +231,7 @@ func DefaultSeccompPath() (string, error) {
func checkDependencyContainer(depCtr, ctr *Container) error {
state, err := depCtr.State()
if err != nil {
- return fmt.Errorf("error accessing dependency container %s state: %w", depCtr.ID(), err)
+ return fmt.Errorf("accessing dependency container %s state: %w", depCtr.ID(), err)
}
if state == define.ContainerStateRemoving {
return fmt.Errorf("cannot use container %s as a dependency as it is being removed: %w", depCtr.ID(), define.ErrCtrStateInvalid)
diff --git a/libpod/util_linux.go b/libpod/util_linux.go
index 7c79e6ce4..efc11710f 100644
--- a/libpod/util_linux.go
+++ b/libpod/util_linux.go
@@ -29,7 +29,7 @@ func systemdSliceFromPath(parent, name string, resources *spec.LinuxResources) (
logrus.Debugf("Created cgroup path %s for parent %s and name %s", cgroupPath, parent, name)
if err := makeSystemdCgroup(cgroupPath, resources); err != nil {
- return "", fmt.Errorf("error creating cgroup %s: %w", cgroupPath, err)
+ return "", fmt.Errorf("creating cgroup %s: %w", cgroupPath, err)
}
logrus.Debugf("Created cgroup %s", cgroupPath)
@@ -112,17 +112,17 @@ var lvpReleaseLabel = label.ReleaseLabel
func LabelVolumePath(path string) error {
_, mountLabel, err := lvpInitLabels([]string{})
if err != nil {
- return fmt.Errorf("error getting default mountlabels: %w", err)
+ return fmt.Errorf("getting default mountlabels: %w", err)
}
if err := lvpReleaseLabel(mountLabel); err != nil {
- return fmt.Errorf("error releasing label %q: %w", mountLabel, err)
+ return fmt.Errorf("releasing label %q: %w", mountLabel, err)
}
if err := lvpRelabel(path, mountLabel, true); err != nil {
if err == syscall.ENOTSUP {
logrus.Debugf("Labeling not supported on %q", path)
} else {
- return fmt.Errorf("error setting selinux label for %s to %q as shared: %w", path, mountLabel, err)
+ return fmt.Errorf("setting selinux label for %s to %q as shared: %w", path, mountLabel, err)
}
}
return nil
diff --git a/libpod/util_unsupported.go b/libpod/util_unsupported.go
new file mode 100644
index 000000000..d2ec3ae7b
--- /dev/null
+++ b/libpod/util_unsupported.go
@@ -0,0 +1,27 @@
+//go:build !linux
+// +build !linux
+
+package libpod
+
+import (
+ "errors"
+
+ spec "github.com/opencontainers/runtime-spec/specs-go"
+)
+
+// systemdSliceFromPath makes a new systemd slice under the given parent with
+// the given name.
+// The parent must be a slice. The name must NOT include ".slice"
+func systemdSliceFromPath(parent, name string, resources *spec.LinuxResources) (string, error) {
+ return "", errors.New("not implemented systemdSliceFromPath")
+}
+
+// Unmount umounts a target directory
+func Unmount(mount string) {
+}
+
+// LabelVolumePath takes a mount path for a volume and gives it an
+// selinux label of either shared or not
+func LabelVolumePath(path string) error {
+ return errors.New("not implemented LabelVolumePath")
+}
diff --git a/libpod/volume.go b/libpod/volume.go
index 2e8cd77a5..a054e4032 100644
--- a/libpod/volume.go
+++ b/libpod/volume.go
@@ -56,7 +56,7 @@ type VolumeConfig struct {
// quota tracking.
DisableQuota bool `json:"disableQuota,omitempty"`
// Timeout allows users to override the default driver timeout of 5 seconds
- Timeout int
+ Timeout *uint `json:"timeout,omitempty"`
}
// VolumeState holds the volume's mutable state.
diff --git a/libpod/volume_inspect.go b/libpod/volume_inspect.go
index dd2f3fd01..73441576b 100644
--- a/libpod/volume_inspect.go
+++ b/libpod/volume_inspect.go
@@ -39,7 +39,7 @@ func (v *Volume) Inspect() (*define.InspectVolumeData, error) {
req.Name = v.Name()
resp, err := v.plugin.GetVolume(req)
if err != nil {
- return nil, fmt.Errorf("error retrieving volume %s information from plugin %s: %w", v.Name(), v.Driver(), err)
+ return nil, fmt.Errorf("retrieving volume %s information from plugin %s: %w", v.Name(), v.Driver(), err)
}
if resp != nil {
data.Status = resp.Status
@@ -64,7 +64,12 @@ func (v *Volume) Inspect() (*define.InspectVolumeData, error) {
data.MountCount = v.state.MountCount
data.NeedsCopyUp = v.state.NeedsCopyUp
data.NeedsChown = v.state.NeedsChown
- data.Timeout = v.config.Timeout
+
+ if v.config.Timeout != nil {
+ data.Timeout = *v.config.Timeout
+ } else if v.UsesVolumeDriver() {
+ data.Timeout = v.runtime.config.Engine.VolumePluginTimeout
+ }
return data, nil
}
diff --git a/libpod/volume_internal_unsupported.go b/libpod/volume_internal_unsupported.go
new file mode 100644
index 000000000..50515e692
--- /dev/null
+++ b/libpod/volume_internal_unsupported.go
@@ -0,0 +1,32 @@
+//go:build !linux
+// +build !linux
+
+package libpod
+
+import (
+ "errors"
+)
+
+// mount mounts the volume if necessary.
+// A mount is necessary if a volume has any options set.
+// If a mount is necessary, v.state.MountCount will be incremented.
+// If it was 0 when the increment occurred, the volume will be mounted on the
+// host. Otherwise, we assume it is already mounted.
+// Must be done while the volume is locked.
+// Is a no-op on volumes that do not require a mount (as defined by
+// volumeNeedsMount()).
+func (v *Volume) mount() error {
+ return errors.New("not implemented (*Volume) mount")
+}
+
+// unmount unmounts the volume if necessary.
+// Unmounting a volume that is not mounted is a no-op.
+// Unmounting a volume that does not require a mount is a no-op.
+// The volume must be locked for this to occur.
+// The mount counter will be decremented if non-zero. If the counter reaches 0,
+// the volume will really be unmounted, as no further containers are using the
+// volume.
+// If force is set, the volume will be unmounted regardless of mount counter.
+func (v *Volume) unmount(force bool) error {
+ return errors.New("not implemented (*Volume) unmount")
+}