From 8d3aec9d08bddf486dcb30e7113777b9d0cc27bd Mon Sep 17 00:00:00 2001 From: Jhon Honce Date: Tue, 5 Oct 2021 10:43:55 -0700 Subject: Enable /debug/pprof API service endpoints * Refactor sidecar HTTP service for /debug/pprof endpoints to use a TCP address given via new podman system service --pprof-address flag * Allow same URL parsing in "system service" as bindings/connection.go * Refactor NewServerWithSettings() to use entities.ServiceOptions in place of deleted server.Options * Updated godoc for impacted functions and types * Fixed API service Shutdown() to do an orderly shutdown when terminated and running with --time=0 Signed-off-by: Jhon Honce --- cmd/podman/system/service.go | 43 ++++++----- cmd/podman/system/service_abi.go | 61 +++++++++------ libpod/shutdown/handler.go | 4 +- pkg/api/server/server.go | 129 ++++++++++++++++--------------- pkg/domain/entities/system.go | 11 ++- test/e2e/system_service_test.go | 142 +++++++++++++++++++++++++++++++++++ test/system/271-tcp-cors-server.bats | 2 +- 7 files changed, 278 insertions(+), 114 deletions(-) create mode 100644 test/e2e/system_service_test.go diff --git a/cmd/podman/system/service.go b/cmd/podman/system/service.go index 99a6b1e1e..41d20d9fd 100644 --- a/cmd/podman/system/service.go +++ b/cmd/podman/system/service.go @@ -35,12 +35,14 @@ Enable a listening service for API access to Podman commands. Long: srvDescription, RunE: service, ValidArgsFunction: common.AutocompleteDefaultOneArg, - Example: `podman system service --time=0 unix:///tmp/podman.sock`, + Example: `podman system service --time=0 unix:///tmp/podman.sock + podman system service --time=0 tcp://localhost:8888`, } srvArgs = struct { - Timeout int64 CorsHeaders string + PProfAddr string + Timeout uint }{} ) @@ -51,15 +53,20 @@ func init() { }) flags := srvCmd.Flags() - cfg := registry.PodmanConfig() + timeFlagName := "time" - flags.Int64VarP(&srvArgs.Timeout, timeFlagName, "t", int64(cfg.Engine.ServiceTimeout), "Time until the service session expires in seconds. Use 0 to disable the timeout") + flags.UintVarP(&srvArgs.Timeout, timeFlagName, "t", cfg.Engine.ServiceTimeout, + "Time until the service session expires in seconds. Use 0 to disable the timeout") _ = srvCmd.RegisterFlagCompletionFunc(timeFlagName, completion.AutocompleteNone) + flags.SetNormalizeFunc(aliasTimeoutFlag) + flags.StringVarP(&srvArgs.CorsHeaders, "cors", "", "", "Set CORS Headers") _ = srvCmd.RegisterFlagCompletionFunc("cors", completion.AutocompleteNone) - flags.SetNormalizeFunc(aliasTimeoutFlag) + flags.StringVarP(&srvArgs.PProfAddr, "pprof-address", "", "", + "Binding network address for pprof profile endpoints, default: do not expose endpoints") + flags.MarkHidden("pprof-address") } func aliasTimeoutFlag(_ *pflag.FlagSet, name string) pflag.NormalizedName { @@ -74,7 +81,7 @@ func service(cmd *cobra.Command, args []string) error { if err != nil { return err } - logrus.Infof("Using API endpoint: '%s'", apiURI) + // Clean up any old existing unix domain socket if len(apiURI) > 0 { uri, err := url.Parse(apiURI) @@ -92,33 +99,31 @@ func service(cmd *cobra.Command, args []string) error { } } - opts := entities.ServiceOptions{ - URI: apiURI, - Command: cmd, + return restService(cmd.Flags(), registry.PodmanConfig(), entities.ServiceOptions{ CorsHeaders: srvArgs.CorsHeaders, - } - - opts.Timeout = time.Duration(srvArgs.Timeout) * time.Second - return restService(opts, cmd.Flags(), registry.PodmanConfig()) + PProfAddr: srvArgs.PProfAddr, + Timeout: time.Duration(srvArgs.Timeout) * time.Second, + URI: apiURI, + }) } -func resolveAPIURI(_url []string) (string, error) { +func resolveAPIURI(uri []string) (string, error) { // When determining _*THE*_ listening endpoint -- // 1) User input wins always // 2) systemd socket activation // 3) rootless honors XDG_RUNTIME_DIR // 4) lastly adapter.DefaultAPIAddress - if len(_url) == 0 { + if len(uri) == 0 { if v, found := os.LookupEnv("PODMAN_SOCKET"); found { - logrus.Debugf("PODMAN_SOCKET='%s' used to determine API endpoint", v) - _url = []string{v} + logrus.Debugf("PODMAN_SOCKET=%q used to determine API endpoint", v) + uri = []string{v} } } switch { - case len(_url) > 0 && _url[0] != "": - return _url[0], nil + case len(uri) > 0 && uri[0] != "": + return uri[0], nil case systemd.SocketActivated(): logrus.Info("Using systemd socket activation to determine API endpoint") return "", nil diff --git a/cmd/podman/system/service_abi.go b/cmd/podman/system/service_abi.go index e484db339..0a4be6aea 100644 --- a/cmd/podman/system/service_abi.go +++ b/cmd/podman/system/service_abi.go @@ -5,9 +5,9 @@ package system import ( "context" "net" + "net/url" "os" "path/filepath" - "strings" api "github.com/containers/podman/v3/pkg/api/server" "github.com/containers/podman/v3/pkg/domain/entities" @@ -20,41 +20,54 @@ import ( "golang.org/x/sys/unix" ) -func restService(opts entities.ServiceOptions, flags *pflag.FlagSet, cfg *entities.PodmanConfig) error { +func restService(flags *pflag.FlagSet, cfg *entities.PodmanConfig, opts entities.ServiceOptions) error { var ( listener *net.Listener err error ) if opts.URI != "" { - fields := strings.Split(opts.URI, ":") - if len(fields) == 1 { + uri, err := url.Parse(opts.URI) + if err != nil { return errors.Errorf("%s is an invalid socket destination", opts.URI) } - path := opts.URI - if fields[0] == "unix" { - if path, err = filepath.Abs(fields[1]); err != nil { - return err - } - } - util.SetSocketPath(path) - if os.Getenv("LISTEN_FDS") != "" { - // If it is activated by systemd, use the first LISTEN_FD (3) - // instead of opening the socket file. - f := os.NewFile(uintptr(3), "podman.sock") - l, err := net.FileListener(f) + + switch uri.Scheme { + case "unix": + path, err := filepath.Abs(uri.Path) if err != nil { return err } - listener = &l - } else { - network := fields[0] - address := strings.Join(fields[1:], ":") - l, err := net.Listen(network, address) + util.SetSocketPath(path) + if os.Getenv("LISTEN_FDS") != "" { + // If it is activated by systemd, use the first LISTEN_FD (3) + // instead of opening the socket file. + f := os.NewFile(uintptr(3), "podman.sock") + l, err := net.FileListener(f) + if err != nil { + return err + } + listener = &l + } else { + l, err := net.Listen(uri.Scheme, path) + if err != nil { + return errors.Wrapf(err, "unable to create socket") + } + listener = &l + } + case "tcp": + host := uri.Host + if host == "" { + // For backward compatibility, support "tcp::" and "tcp://:" + host = uri.Opaque + } + l, err := net.Listen(uri.Scheme, host) if err != nil { - return errors.Wrapf(err, "unable to create socket") + return errors.Wrapf(err, "unable to create socket %v", host) } listener = &l + default: + logrus.Debugf("Attempting API Service endpoint scheme %q", uri.Scheme) } } @@ -75,12 +88,12 @@ func restService(opts entities.ServiceOptions, flags *pflag.FlagSet, cfg *entiti servicereaper.Start() infra.StartWatcher(rt) - server, err := api.NewServerWithSettings(rt, listener, api.Options{Timeout: opts.Timeout, CorsHeaders: opts.CorsHeaders}) + server, err := api.NewServerWithSettings(rt, listener, opts) if err != nil { return err } defer func() { - if err := server.Shutdown(); err != nil { + if err := server.Shutdown(false); err != nil { logrus.Warnf("Error when stopping API service: %s", err) } }() diff --git a/libpod/shutdown/handler.go b/libpod/shutdown/handler.go index cca74c3c4..b0feafa0b 100644 --- a/libpod/shutdown/handler.go +++ b/libpod/shutdown/handler.go @@ -87,12 +87,12 @@ func Stop() error { return nil } -// Temporarily inhibit signals from shutting down Libpod. +// Inhibit temporarily inhibit signals from shutting down Libpod. func Inhibit() { shutdownInhibit.RLock() } -// Stop inhibiting signals from shutting down Libpod. +// Uninhibit stop inhibiting signals from shutting down Libpod. func Uninhibit() { shutdownInhibit.RUnlock() } diff --git a/pkg/api/server/server.go b/pkg/api/server/server.go index c7174775e..6e9578cd1 100644 --- a/pkg/api/server/server.go +++ b/pkg/api/server/server.go @@ -6,6 +6,7 @@ import ( "log" "net" "net/http" + "net/http/pprof" "os" "runtime" "strings" @@ -18,6 +19,7 @@ import ( "github.com/containers/podman/v3/pkg/api/handlers" "github.com/containers/podman/v3/pkg/api/server/idle" "github.com/containers/podman/v3/pkg/api/types" + "github.com/containers/podman/v3/pkg/domain/entities" "github.com/coreos/go-systemd/v22/activation" "github.com/coreos/go-systemd/v22/daemon" "github.com/gorilla/mux" @@ -27,14 +29,14 @@ import ( type APIServer struct { http.Server // The HTTP work happens here - *schema.Decoder // Decoder for Query parameters to structs - context.Context // Context to carry objects to handlers - *libpod.Runtime // Where the real work happens net.Listener // mux for routing HTTP API calls to libpod routines + *libpod.Runtime // Where the real work happens + *schema.Decoder // Decoder for Query parameters to structs context.CancelFunc // Stop APIServer + context.Context // Context to carry objects to handlers + CorsHeaders string // Inject Cross-Origin Resource Sharing (CORS) headers + PProfAddr string // Binding network address for pprof profiles idleTracker *idle.Tracker // Track connections to support idle shutdown - pprof *http.Server // Sidecar http server for providing performance data - CorsHeaders string // Inject CORS headers to each request } // Number of seconds to wait for next request, if exceeded shutdown server @@ -49,22 +51,20 @@ var ( shutdownOnce sync.Once ) -type Options struct { - Timeout time.Duration - CorsHeaders string -} - // NewServer will create and configure a new API server with all defaults func NewServer(runtime *libpod.Runtime) (*APIServer, error) { - return newServer(runtime, DefaultServiceDuration, nil, DefaultCorsHeaders) + return newServer(runtime, nil, entities.ServiceOptions{ + CorsHeaders: DefaultCorsHeaders, + Timeout: DefaultServiceDuration, + }) } // NewServerWithSettings will create and configure a new API server using provided settings -func NewServerWithSettings(runtime *libpod.Runtime, listener *net.Listener, opts Options) (*APIServer, error) { - return newServer(runtime, opts.Timeout, listener, opts.CorsHeaders) +func NewServerWithSettings(runtime *libpod.Runtime, listener *net.Listener, opts entities.ServiceOptions) (*APIServer, error) { + return newServer(runtime, listener, opts) } -func newServer(runtime *libpod.Runtime, duration time.Duration, listener *net.Listener, corsHeaders string) (*APIServer, error) { +func newServer(runtime *libpod.Runtime, listener *net.Listener, opts entities.ServiceOptions) (*APIServer, error) { // If listener not provided try socket activation protocol if listener == nil { if _, found := os.LookupEnv("LISTEN_PID"); !found { @@ -80,15 +80,15 @@ func newServer(runtime *libpod.Runtime, duration time.Duration, listener *net.Li } listener = &listeners[0] } - if corsHeaders == "" { + if opts.CorsHeaders == "" { logrus.Debug("CORS Headers were not set") } else { - logrus.Debugf("CORS Headers were set to %s", corsHeaders) + logrus.Debugf("CORS Headers were set to %q", opts.CorsHeaders) } logrus.Infof("API service listening on %q", (*listener).Addr()) router := mux.NewRouter().UseEncodedPath() - tracker := idle.NewTracker(duration) + tracker := idle.NewTracker(opts.Timeout) server := APIServer{ Server: http.Server{ @@ -98,10 +98,11 @@ func newServer(runtime *libpod.Runtime, duration time.Duration, listener *net.Li ConnState: tracker.ConnState, ErrorLog: log.New(logrus.StandardLogger().Out, "", 0), Handler: router, - IdleTimeout: duration * 2, + IdleTimeout: opts.Timeout * 2, }, - CorsHeaders: corsHeaders, + CorsHeaders: opts.CorsHeaders, Listener: *listener, + PProfAddr: opts.PProfAddr, idleTracker: tracker, } @@ -181,18 +182,18 @@ func newServer(runtime *libpod.Runtime, duration time.Duration, listener *net.Li return &server, nil } -// If the NOTIFY_SOCKET is set, communicate the PID and readiness, and -// further unset NOTIFY_SOCKET to prevent containers from sending -// messages and unset INVOCATION_ID so conmon and containers are in -// the correct cgroup. -func setupSystemd() { - if len(os.Getenv("NOTIFY_SOCKET")) == 0 { +// setupSystemd notifies systemd API service is ready +// If the NOTIFY_SOCKET is set, communicate the PID and readiness, and unset INVOCATION_ID +// so conmon and containers are in the correct cgroup. +func (s *APIServer) setupSystemd() { + if _, found := os.LookupEnv("NOTIFY_SOCKET"); !found { return } + payload := fmt.Sprintf("MAINPID=%d\n", os.Getpid()) payload += daemon.SdNotifyReady if sent, err := daemon.SdNotify(true, payload); err != nil { - logrus.Error("API service error notifying systemd of Conmon PID: " + err.Error()) + logrus.Error("API service failed to notify systemd of Conmon PID: " + err.Error()) } else if !sent { logrus.Warn("API service unable to successfully send SDNotify") } @@ -204,10 +205,10 @@ func setupSystemd() { // Serve starts responding to HTTP requests. func (s *APIServer) Serve() error { - setupSystemd() + s.setupPprof() if err := shutdown.Register("server", func(sig os.Signal) error { - return s.Shutdown() + return s.Shutdown(true) }); err != nil { return err } @@ -216,32 +217,17 @@ func (s *APIServer) Serve() error { return err } - errChan := make(chan error, 1) - go func() { <-s.idleTracker.Done() - logrus.Debug("API service shutting down, idle for " + s.idleTracker.Duration.Round(time.Second).String()) - _ = s.Shutdown() + logrus.Debugf("API service(s) shutting down, idle for %ds", int(s.idleTracker.Duration.Seconds())) + _ = s.Shutdown(false) }() - if logrus.IsLevelEnabled(logrus.DebugLevel) { - go func() { - pprofMux := mux.NewRouter() - pprofMux.PathPrefix("/debug/pprof").Handler(http.DefaultServeMux) - runtime.SetMutexProfileFraction(1) - runtime.SetBlockProfileRate(1) - s.pprof = &http.Server{Addr: "localhost:8888", Handler: pprofMux} - err := s.pprof.ListenAndServe() - if err != nil && err != http.ErrServerClosed { - logrus.Warnf("API profiler service failed: %v", err) - } - }() - } - - // Before we start serving, ensure umask is properly set for container - // creation. + // Before we start serving, ensure umask is properly set for container creation. _ = syscall.Umask(0022) + errChan := make(chan error, 1) + s.setupSystemd() go func() { err := s.Server.Serve(s.Listener) if err != nil && err != http.ErrServerClosed { @@ -254,10 +240,40 @@ func (s *APIServer) Serve() error { return <-errChan } +// setupPprof enables pprof default endpoints +// Note: These endpoints and the podman flag --cpu-profile are mutually exclusive +// +// Examples: +// #1 go tool pprof -http localhost:8889 localhost:8888/debug/pprof/heap?seconds=120 +// Note: web page will only render after a sample has been recorded +// #2 curl http://localhost:8888/debug/pprof/heap > heap.pprof && go tool pprof heap.pprof +func (s *APIServer) setupPprof() { + if s.PProfAddr == "" { + return + } + + logrus.Infof("pprof service listening on %q", s.PProfAddr) + go func() { + old := runtime.SetMutexProfileFraction(1) + defer runtime.SetMutexProfileFraction(old) + + runtime.SetBlockProfileRate(1) + defer runtime.SetBlockProfileRate(0) + + router := mux.NewRouter() + router.PathPrefix("/debug/pprof/").HandlerFunc(pprof.Index) + + err := http.ListenAndServe(s.PProfAddr, router) + if err != nil && err != http.ErrServerClosed { + logrus.Warnf("pprof service failed: %v", err) + } + }() +} + // Shutdown is a clean shutdown waiting on existing clients -func (s *APIServer) Shutdown() error { - if s.idleTracker.Duration == UnlimitedServiceDuration { - logrus.Debug("API service shutdown ignored as Duration is UnlimitedService") +func (s *APIServer) Shutdown(halt bool) error { + if s.idleTracker.Duration == UnlimitedServiceDuration && !halt { + logrus.Debug("API service shutdown request ignored as Duration is UnlimitedService") return nil } @@ -266,17 +282,6 @@ func (s *APIServer) Shutdown() error { _, file, line, _ := runtime.Caller(1) logrus.Debugf("API service shutdown by %s:%d, %d/%d connection(s)", file, line, s.idleTracker.ActiveConnections(), s.idleTracker.TotalConnections()) - - go func() { - ctx, cancel := context.WithTimeout(context.Background(), s.idleTracker.Duration) - go func() { - defer cancel() - if err := s.pprof.Shutdown(ctx); err != nil { - logrus.Warnf("Failed to cleanly shutdown API pprof service: %v", err) - } - }() - <-ctx.Done() - }() } // Gracefully shutdown server(s), duration of wait same as idle window diff --git a/pkg/domain/entities/system.go b/pkg/domain/entities/system.go index cca4bf44e..fe041dec8 100644 --- a/pkg/domain/entities/system.go +++ b/pkg/domain/entities/system.go @@ -6,15 +6,14 @@ import ( "github.com/containers/podman/v3/libpod/define" "github.com/containers/podman/v3/pkg/domain/entities/reports" "github.com/containers/podman/v3/pkg/domain/entities/types" - "github.com/spf13/cobra" ) -// ServiceOptions provides the input for starting an API Service +// ServiceOptions provides the input for starting an API and sidecar pprof services type ServiceOptions struct { - URI string // Path to unix domain socket service should listen on - Timeout time.Duration // duration of inactivity the service should wait before shutting down - Command *cobra.Command // CLI command provided. Used in V1 code - CorsHeaders string // CORS headers + CorsHeaders string // Cross-Origin Resource Sharing (CORS) headers + PProfAddr string // Network address to bind pprof profiles service + Timeout time.Duration // Duration of inactivity the service should wait before shutting down + URI string // Path to unix domain socket service should listen on } // SystemPruneOptions provides options to prune system. diff --git a/test/e2e/system_service_test.go b/test/e2e/system_service_test.go new file mode 100644 index 000000000..684ac56b4 --- /dev/null +++ b/test/e2e/system_service_test.go @@ -0,0 +1,142 @@ +package integration + +import ( + "io/ioutil" + "net" + "net/http" + "net/url" + "strconv" + "time" + + . "github.com/containers/podman/v3/test/utils" + "github.com/containers/podman/v3/utils" + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + . "github.com/onsi/gomega/gexec" +) + +var _ = Describe("podman system service", func() { + var podmanTest *PodmanTestIntegration + + BeforeEach(func() { + tempdir, err := CreateTempDirInTempDir() + Expect(err).ShouldNot(HaveOccurred()) + + podmanTest = PodmanTestCreate(tempdir) + podmanTest.Setup() + }) + + AfterEach(func() { + podmanTest.Cleanup() + processTestResult(CurrentGinkgoTestDescription()) + }) + + Describe("verify timeout", func() { + It("of 2 seconds", func() { + SkipIfRemote("service subcommand not supported remotely") + + address := url.URL{ + Scheme: "tcp", + Host: net.JoinHostPort("localhost", randomPort()), + } + session := podmanTest.Podman([]string{ + "system", "service", "--time=2", address.String(), + }) + defer session.Kill() + + WaitForService(address) + + session.Wait(5 * time.Second) + Eventually(session, 5).Should(Exit(0)) + }) + }) + + Describe("verify pprof endpoints", func() { + // Depends on pkg/api/server/server.go:255 + const magicComment = "pprof service listening on" + + It("are available", func() { + SkipIfRemote("service subcommand not supported remotely") + + address := url.URL{ + Scheme: "tcp", + Host: net.JoinHostPort("localhost", randomPort()), + } + + pprofPort := randomPort() + session := podmanTest.Podman([]string{ + "system", "service", "--log-level=info", "--time=0", + "--pprof-address=localhost:" + pprofPort, address.String(), + }) + defer session.Kill() + + WaitForService(address) + + // Combined with test below we have positive/negative test for pprof + Expect(session.Err.Contents()).Should(ContainSubstring(magicComment)) + + heap := url.URL{ + Scheme: "http", + Host: net.JoinHostPort("localhost", pprofPort), + Path: "/debug/pprof/heap", + RawQuery: "seconds=2", + } + resp, err := http.Get(heap.String()) + Expect(err).ShouldNot(HaveOccurred()) + defer resp.Body.Close() + Expect(resp).To(HaveHTTPStatus(http.StatusOK)) + + body, err := ioutil.ReadAll(resp.Body) + Expect(err).ShouldNot(HaveOccurred()) + Expect(body).ShouldNot(BeEmpty()) + + session.Interrupt().Wait(2 * time.Second) + Eventually(session, 2).Should(Exit(1)) + }) + + It("are not available", func() { + SkipIfRemote("service subcommand not supported remotely") + + address := url.URL{ + Scheme: "tcp", + Host: net.JoinHostPort("localhost", randomPort()), + } + + session := podmanTest.Podman([]string{ + "system", "service", "--log-level=info", "--time=0", address.String(), + }) + defer session.Kill() + + WaitForService(address) + + // Combined with test above we have positive/negative test for pprof + Expect(session.Err.Contents()).ShouldNot(ContainSubstring(magicComment)) + + session.Interrupt().Wait(2 * time.Second) + Eventually(session, 2).Should(Exit(1)) + }) + }) +}) + +// WaitForService blocks, waiting for some service listening on given host:port +func WaitForService(address url.URL) { + // Wait for podman to be ready + var conn net.Conn + var err error + for i := 1; i <= 5; i++ { + conn, err = net.Dial("tcp", address.Host) + if err != nil { + // Podman not available yet... + time.Sleep(time.Duration(i) * time.Second) + } + } + Expect(err).ShouldNot(HaveOccurred()) + conn.Close() +} + +// randomPort leans on the go net library to find an available port... +func randomPort() string { + port, err := utils.GetRandomPort() + Expect(err).ShouldNot(HaveOccurred()) + return strconv.Itoa(port) +} diff --git a/test/system/271-tcp-cors-server.bats b/test/system/271-tcp-cors-server.bats index d8e4eb3df..be77afd40 100644 --- a/test/system/271-tcp-cors-server.bats +++ b/test/system/271-tcp-cors-server.bats @@ -38,7 +38,7 @@ SOCKET_FILE="$UNIT_DIR/$SERVICE_NAME.socket" @test "podman system service - CORS enabled in logs" { skip_if_remote "system service tests are meaningless over remote" run_podman system service --log-level="debug" --cors="*" -t 1 - is "$output" ".*CORS Headers were set to \*.*" "debug log confirms CORS headers set" + is "$output" ".*CORS Headers were set to ..\*...*" "debug log confirms CORS headers set" } # vim: filetype=sh -- cgit v1.2.3-54-g00ecf