diff options
author | Matthew Heon <mheon@redhat.com> | 2022-06-22 16:37:00 -0400 |
---|---|---|
committer | Matthew Heon <mheon@redhat.com> | 2022-06-23 10:44:38 -0400 |
commit | 3a810b8d2ad61a7bdca9b10e183ea7f6708474eb (patch) | |
tree | 0a59af988ece1f6d8d1b8dcff462ddaff113bf78 | |
parent | 30e7cbccc1942d4f8e3b4f489b9f33f30dec7233 (diff) | |
download | podman-3a810b8d2ad61a7bdca9b10e183ea7f6708474eb.tar.gz podman-3a810b8d2ad61a7bdca9b10e183ea7f6708474eb.tar.bz2 podman-3a810b8d2ad61a7bdca9b10e183ea7f6708474eb.zip |
Two fixes for DB exit code handling
Firstly: don't prune exit codes after a refresh - instead, clear
the table entirely. We are guaranteed that all containers are
gone after a refresh, we should not worry about exit codes given
this.
Secondly: alter the way pruning was done. We were updating the DB
by calling Update from within an existing View, and stacking an
RW transaction on top of an existing RO one seems dodgy; further,
modifying a bucket while iterating over it with ForEach is
undefined behavior.
Hopefully this will resolve our CI issues.
Signed-off-by: Matthew Heon <mheon@redhat.com>
-rw-r--r-- | libpod/boltdb_state.go | 141 |
1 files changed, 107 insertions, 34 deletions
diff --git a/libpod/boltdb_state.go b/libpod/boltdb_state.go index e5219ab8c..471f64b84 100644 --- a/libpod/boltdb_state.go +++ b/libpod/boltdb_state.go @@ -161,10 +161,6 @@ func (s *BoltState) Refresh() error { return define.ErrDBClosed } - if err := s.PruneContainerExitCodes(); err != nil { - return err - } - db, err := s.getDBCon() if err != nil { return err @@ -207,6 +203,45 @@ func (s *BoltState) Refresh() error { return err } + exitCodeBucket, err := getExitCodeBucket(tx) + if err != nil { + return err + } + + timeStampBucket, err := getExitCodeTimeStampBucket(tx) + if err != nil { + return err + } + + // Clear all exec exit codes + toRemoveExitCodes := []string{} + err = exitCodeBucket.ForEach(func(id, _ []byte) error { + toRemoveExitCodes = append(toRemoveExitCodes, string(id)) + return nil + }) + if err != nil { + return errors.Wrapf(err, "error reading exit codes bucket") + } + for _, id := range toRemoveExitCodes { + if err := exitCodeBucket.Delete([]byte(id)); err != nil { + return errors.Wrapf(err, "error removing exit code for ID %s", id) + } + } + + toRemoveTimeStamps := []string{} + err = timeStampBucket.ForEach(func(id, _ []byte) error { + toRemoveTimeStamps = append(toRemoveTimeStamps, string(id)) + return nil + }) + if err != nil { + return errors.Wrapf(err, "reading timestamps bucket") + } + for _, id := range toRemoveTimeStamps { + if err := timeStampBucket.Delete([]byte(id)); err != nil { + return errors.Wrapf(err, "removing timestamp for ID %s", id) + } + } + // Iterate through all IDs. Check if they are containers. // If they are, unmarshal their state, and then clear // PID, mountpoint, and state for all of them @@ -1358,6 +1393,14 @@ func (s *BoltState) GetContainerConfig(id string) (*ContainerConfig, error) { // AddContainerExitCode adds the exit code for the specified container to the database. func (s *BoltState) AddContainerExitCode(id string, exitCode int32) error { + if len(id) == 0 { + return define.ErrEmptyID + } + + if !s.valid { + return define.ErrDBClosed + } + db, err := s.getDBCon() if err != nil { return err @@ -1397,6 +1440,14 @@ func (s *BoltState) AddContainerExitCode(id string, exitCode int32) error { // GetContainerExitCode returns the exit code for the specified container. func (s *BoltState) GetContainerExitCode(id string) (int32, error) { + if len(id) == 0 { + return -1, define.ErrEmptyID + } + + if !s.valid { + return -1, define.ErrDBClosed + } + db, err := s.getDBCon() if err != nil { return -1, err @@ -1429,6 +1480,14 @@ func (s *BoltState) GetContainerExitCode(id string) (int32, error) { // GetContainerExitCodeTimeStamp returns the time stamp when the exit code of // the specified container was added to the database. func (s *BoltState) GetContainerExitCodeTimeStamp(id string) (*time.Time, error) { + if len(id) == 0 { + return nil, define.ErrEmptyID + } + + if !s.valid { + return nil, define.ErrDBClosed + } + db, err := s.getDBCon() if err != nil { return nil, err @@ -1456,16 +1515,22 @@ func (s *BoltState) GetContainerExitCodeTimeStamp(id string) (*time.Time, error) }) } -// PrunExitCodes removes exit codes older than 5 minutes. +// PruneExitCodes removes exit codes older than 5 minutes. func (s *BoltState) PruneContainerExitCodes() error { + if !s.valid { + return define.ErrDBClosed + } + db, err := s.getDBCon() if err != nil { return err } defer s.deferredCloseDBCon(db) + toRemoveIDs := []string{} + threshold := time.Minute * 5 - return db.View(func(tx *bolt.Tx) error { + err = db.View(func(tx *bolt.Tx) error { timeStampBucket, err := getExitCodeTimeStampBucket(tx) if err != nil { return err @@ -1477,43 +1542,51 @@ func (s *BoltState) PruneContainerExitCodes() error { return fmt.Errorf("converting raw time stamp %v of container %s from DB: %w", rawTimeStamp, string(rawID), err) } if time.Since(timeStamp) > threshold { - // Since the DB connection is locked, pass it - // to remove the exit codes to avoid race - // conditions. - return s.removeContainerExitCode(rawID, db) + toRemoveIDs = append(toRemoveIDs, string(rawID)) } return nil }) }) -} + if err != nil { + return errors.Wrapf(err, "reading exit codes to prune") + } -// removeContainerExitCode removes the exit code and time stamp of the specified container. -func (s *BoltState) removeContainerExitCode(rawID []byte, db *bolt.DB) error { - return db.Update(func(tx *bolt.Tx) error { - exitCodeBucket, err := getExitCodeBucket(tx) - if err != nil { - return err - } - timeStampBucket, err := getExitCodeTimeStampBucket(tx) - if err != nil { - return err - } + if len(toRemoveIDs) > 0 { + err = db.Update(func(tx *bolt.Tx) error { + exitCodeBucket, err := getExitCodeBucket(tx) + if err != nil { + return err + } + timeStampBucket, err := getExitCodeTimeStampBucket(tx) + if err != nil { + return err + } - var finalErr error - if err := exitCodeBucket.Delete(rawID); err != nil { - finalErr = fmt.Errorf("removing exit code of container %s from DB: %w", string(rawID), err) - } - if err := timeStampBucket.Delete(rawID); err != nil { - err = fmt.Errorf("removing exit-code time stamp of container %s from DB: %w", string(rawID), err) - if finalErr != nil { - logrus.Error(err) - } else { - finalErr = err + var finalErr error + for _, id := range toRemoveIDs { + rawID := []byte(id) + if err := exitCodeBucket.Delete(rawID); err != nil { + if finalErr != nil { + logrus.Error(finalErr) + } + finalErr = fmt.Errorf("removing exit code of container %s from DB: %w", id, err) + } + if err := timeStampBucket.Delete(rawID); err != nil { + if finalErr != nil { + logrus.Error(finalErr) + } + finalErr = fmt.Errorf("removing exit code timestamp of container %s from DB: %w", id, err) + } } + + return finalErr + }) + if err != nil { + return errors.Wrapf(err, "pruning exit codes") } + } - return finalErr - }) + return nil } // AddExecSession adds an exec session to the state. |