From 3a810b8d2ad61a7bdca9b10e183ea7f6708474eb Mon Sep 17 00:00:00 2001 From: Matthew Heon Date: Wed, 22 Jun 2022 16:37:00 -0400 Subject: Two fixes for DB exit code handling Firstly: don't prune exit codes after a refresh - instead, clear the table entirely. We are guaranteed that all containers are gone after a refresh, we should not worry about exit codes given this. Secondly: alter the way pruning was done. We were updating the DB by calling Update from within an existing View, and stacking an RW transaction on top of an existing RO one seems dodgy; further, modifying a bucket while iterating over it with ForEach is undefined behavior. Hopefully this will resolve our CI issues. Signed-off-by: Matthew Heon --- libpod/boltdb_state.go | 141 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 107 insertions(+), 34 deletions(-) diff --git a/libpod/boltdb_state.go b/libpod/boltdb_state.go index e5219ab8c..471f64b84 100644 --- a/libpod/boltdb_state.go +++ b/libpod/boltdb_state.go @@ -161,10 +161,6 @@ func (s *BoltState) Refresh() error { return define.ErrDBClosed } - if err := s.PruneContainerExitCodes(); err != nil { - return err - } - db, err := s.getDBCon() if err != nil { return err @@ -207,6 +203,45 @@ func (s *BoltState) Refresh() error { return err } + exitCodeBucket, err := getExitCodeBucket(tx) + if err != nil { + return err + } + + timeStampBucket, err := getExitCodeTimeStampBucket(tx) + if err != nil { + return err + } + + // Clear all exec exit codes + toRemoveExitCodes := []string{} + err = exitCodeBucket.ForEach(func(id, _ []byte) error { + toRemoveExitCodes = append(toRemoveExitCodes, string(id)) + return nil + }) + if err != nil { + return errors.Wrapf(err, "error reading exit codes bucket") + } + for _, id := range toRemoveExitCodes { + if err := exitCodeBucket.Delete([]byte(id)); err != nil { + return errors.Wrapf(err, "error removing exit code for ID %s", id) + } + } + + toRemoveTimeStamps := []string{} + err = timeStampBucket.ForEach(func(id, _ []byte) error { + toRemoveTimeStamps = append(toRemoveTimeStamps, string(id)) + return nil + }) + if err != nil { + return errors.Wrapf(err, "reading timestamps bucket") + } + for _, id := range toRemoveTimeStamps { + if err := timeStampBucket.Delete([]byte(id)); err != nil { + return errors.Wrapf(err, "removing timestamp for ID %s", id) + } + } + // Iterate through all IDs. Check if they are containers. // If they are, unmarshal their state, and then clear // PID, mountpoint, and state for all of them @@ -1358,6 +1393,14 @@ func (s *BoltState) GetContainerConfig(id string) (*ContainerConfig, error) { // AddContainerExitCode adds the exit code for the specified container to the database. func (s *BoltState) AddContainerExitCode(id string, exitCode int32) error { + if len(id) == 0 { + return define.ErrEmptyID + } + + if !s.valid { + return define.ErrDBClosed + } + db, err := s.getDBCon() if err != nil { return err @@ -1397,6 +1440,14 @@ func (s *BoltState) AddContainerExitCode(id string, exitCode int32) error { // GetContainerExitCode returns the exit code for the specified container. func (s *BoltState) GetContainerExitCode(id string) (int32, error) { + if len(id) == 0 { + return -1, define.ErrEmptyID + } + + if !s.valid { + return -1, define.ErrDBClosed + } + db, err := s.getDBCon() if err != nil { return -1, err @@ -1429,6 +1480,14 @@ func (s *BoltState) GetContainerExitCode(id string) (int32, error) { // GetContainerExitCodeTimeStamp returns the time stamp when the exit code of // the specified container was added to the database. func (s *BoltState) GetContainerExitCodeTimeStamp(id string) (*time.Time, error) { + if len(id) == 0 { + return nil, define.ErrEmptyID + } + + if !s.valid { + return nil, define.ErrDBClosed + } + db, err := s.getDBCon() if err != nil { return nil, err @@ -1456,16 +1515,22 @@ func (s *BoltState) GetContainerExitCodeTimeStamp(id string) (*time.Time, error) }) } -// PrunExitCodes removes exit codes older than 5 minutes. +// PruneExitCodes removes exit codes older than 5 minutes. func (s *BoltState) PruneContainerExitCodes() error { + if !s.valid { + return define.ErrDBClosed + } + db, err := s.getDBCon() if err != nil { return err } defer s.deferredCloseDBCon(db) + toRemoveIDs := []string{} + threshold := time.Minute * 5 - return db.View(func(tx *bolt.Tx) error { + err = db.View(func(tx *bolt.Tx) error { timeStampBucket, err := getExitCodeTimeStampBucket(tx) if err != nil { return err @@ -1477,43 +1542,51 @@ func (s *BoltState) PruneContainerExitCodes() error { return fmt.Errorf("converting raw time stamp %v of container %s from DB: %w", rawTimeStamp, string(rawID), err) } if time.Since(timeStamp) > threshold { - // Since the DB connection is locked, pass it - // to remove the exit codes to avoid race - // conditions. - return s.removeContainerExitCode(rawID, db) + toRemoveIDs = append(toRemoveIDs, string(rawID)) } return nil }) }) -} + if err != nil { + return errors.Wrapf(err, "reading exit codes to prune") + } -// removeContainerExitCode removes the exit code and time stamp of the specified container. -func (s *BoltState) removeContainerExitCode(rawID []byte, db *bolt.DB) error { - return db.Update(func(tx *bolt.Tx) error { - exitCodeBucket, err := getExitCodeBucket(tx) - if err != nil { - return err - } - timeStampBucket, err := getExitCodeTimeStampBucket(tx) - if err != nil { - return err - } + if len(toRemoveIDs) > 0 { + err = db.Update(func(tx *bolt.Tx) error { + exitCodeBucket, err := getExitCodeBucket(tx) + if err != nil { + return err + } + timeStampBucket, err := getExitCodeTimeStampBucket(tx) + if err != nil { + return err + } - var finalErr error - if err := exitCodeBucket.Delete(rawID); err != nil { - finalErr = fmt.Errorf("removing exit code of container %s from DB: %w", string(rawID), err) - } - if err := timeStampBucket.Delete(rawID); err != nil { - err = fmt.Errorf("removing exit-code time stamp of container %s from DB: %w", string(rawID), err) - if finalErr != nil { - logrus.Error(err) - } else { - finalErr = err + var finalErr error + for _, id := range toRemoveIDs { + rawID := []byte(id) + if err := exitCodeBucket.Delete(rawID); err != nil { + if finalErr != nil { + logrus.Error(finalErr) + } + finalErr = fmt.Errorf("removing exit code of container %s from DB: %w", id, err) + } + if err := timeStampBucket.Delete(rawID); err != nil { + if finalErr != nil { + logrus.Error(finalErr) + } + finalErr = fmt.Errorf("removing exit code timestamp of container %s from DB: %w", id, err) + } } + + return finalErr + }) + if err != nil { + return errors.Wrapf(err, "pruning exit codes") } + } - return finalErr - }) + return nil } // AddExecSession adds an exec session to the state. -- cgit v1.2.3-54-g00ecf