vendor buildah@main

Note that the bud-logfile-with-split-logfile-by-platform test is skipped on the remote client (see #14544). Signed-off-by: Valentin Rothberg <vrothberg@redhat.com>
author: Valentin Rothberg <vrothberg@redhat.com> 2022-06-08 11:55:13 +0200
committer: Valentin Rothberg <vrothberg@redhat.com> 2022-06-10 09:42:19 +0200
commit: 46c8da7d9acd6011f318ce8fa9c38591888654f0 (patch)
tree: 54e097a523e9d67eb97b65e963a7c46bf2f7973d /vendor/github.com/Microsoft/hcsshim/internal/hcs/system.go
parent: 9f1bd0a0a1494f46a49ca7f22511c5a78006afd8 (diff)
download: podman-46c8da7d9acd6011f318ce8fa9c38591888654f0.tar.gz
podman-46c8da7d9acd6011f318ce8fa9c38591888654f0.tar.bz2
podman-46c8da7d9acd6011f318ce8fa9c38591888654f0.zip
1 files changed, 179 insertions, 9 deletions
diff --git a/vendor/github.com/Microsoft/hcsshim/internal/hcs/system.go b/vendor/github.com/Microsoft/hcsshim/internal/hcs/system.go
index 75499c967..1d45a703b 100644
--- a/vendor/github.com/Microsoft/hcsshim/internal/hcs/system.go
+++ b/vendor/github.com/Microsoft/hcsshim/internal/hcs/system.go
@@ -4,17 +4,22 @@ import (
 	"context"
 	"encoding/json"
 	"errors"
+	"fmt"
 	"strings"
 	"sync"
 	"syscall"
+	"time"
 
 	"github.com/Microsoft/hcsshim/internal/cow"
 	"github.com/Microsoft/hcsshim/internal/hcs/schema1"
 	hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2"
+	"github.com/Microsoft/hcsshim/internal/jobobject"
 	"github.com/Microsoft/hcsshim/internal/log"
+	"github.com/Microsoft/hcsshim/internal/logfields"
 	"github.com/Microsoft/hcsshim/internal/oc"
 	"github.com/Microsoft/hcsshim/internal/timeout"
 	"github.com/Microsoft/hcsshim/internal/vmcompute"
+	"github.com/sirupsen/logrus"
 	"go.opencensus.io/trace"
 )
 
@@ -28,7 +33,8 @@ type System struct {
 	waitBlock      chan struct{}
 	waitError      error
 	exitError      error
-	os, typ        string
+	os, typ, owner string
+	startTime      time.Time
 }
 
 func newSystem(id string) *System {
@@ -38,6 +44,11 @@ func newSystem(id string) *System {
 	}
 }
 
+// Implementation detail for silo naming, this should NOT be relied upon very heavily.
+func siloNameFmt(containerID string) string {
+	return fmt.Sprintf(`\Container_%s`, containerID)
+}
+
 // CreateComputeSystem creates a new compute system with the given configuration but does not start it.
 func CreateComputeSystem(ctx context.Context, id string, hcsDocumentInterface interface{}) (_ *System, err error) {
 	operation := "hcs::CreateComputeSystem"
@@ -127,6 +138,7 @@ func (computeSystem *System) getCachedProperties(ctx context.Context) error {
 	}
 	computeSystem.typ = strings.ToLower(props.SystemType)
 	computeSystem.os = strings.ToLower(props.RuntimeOSType)
+	computeSystem.owner = strings.ToLower(props.Owner)
 	if computeSystem.os == "" && computeSystem.typ == "container" {
 		// Pre-RS5 HCS did not return the OS, but it only supported containers
 		// that ran Windows.
@@ -195,7 +207,7 @@ func (computeSystem *System) Start(ctx context.Context) (err error) {
 	if err != nil {
 		return makeSystemError(computeSystem, operation, err, events)
 	}
-
+	computeSystem.startTime = time.Now()
 	return nil
 }
 
@@ -324,11 +336,115 @@ func (computeSystem *System) Properties(ctx context.Context, types ...schema1.Pr
 	return properties, nil
 }
 
-// PropertiesV2 returns the requested container properties targeting a V2 schema container.
-func (computeSystem *System) PropertiesV2(ctx context.Context, types ...hcsschema.PropertyType) (*hcsschema.Properties, error) {
-	computeSystem.handleLock.RLock()
-	defer computeSystem.handleLock.RUnlock()
+// queryInProc handles querying for container properties without reaching out to HCS. `props`
+// will be updated to contain any data returned from the queries present in `types`. If any properties
+// failed to be queried they will be tallied up and returned in as the first return value. Failures on
+// query are NOT considered errors; the only failure case for this method is if the containers job object
+// cannot be opened.
+func (computeSystem *System) queryInProc(ctx context.Context, props *hcsschema.Properties, types []hcsschema.PropertyType) ([]hcsschema.PropertyType, error) {
+	// In the future we can make use of some new functionality in the HCS that allows you
+	// to pass a job object for HCS to use for the container. Currently, the only way we'll
+	// be able to open the job/silo is if we're running as SYSTEM.
+	jobOptions := &jobobject.Options{
+		UseNTVariant: true,
+		Name:         siloNameFmt(computeSystem.id),
+	}
+	job, err := jobobject.Open(ctx, jobOptions)
+	if err != nil {
+		return nil, err
+	}
+	defer job.Close()
+
+	var fallbackQueryTypes []hcsschema.PropertyType
+	for _, propType := range types {
+		switch propType {
+		case hcsschema.PTStatistics:
+			// Handle a bad caller asking for the same type twice. No use in re-querying if this is
+			// filled in already.
+			if props.Statistics == nil {
+				props.Statistics, err = computeSystem.statisticsInProc(job)
+				if err != nil {
+					log.G(ctx).WithError(err).Warn("failed to get statistics in-proc")
+
+					fallbackQueryTypes = append(fallbackQueryTypes, propType)
+				}
+			}
+		default:
+			fallbackQueryTypes = append(fallbackQueryTypes, propType)
+		}
+	}
+
+	return fallbackQueryTypes, nil
+}
+
+// statisticsInProc emulates what HCS does to grab statistics for a given container with a small
+// change to make grabbing the private working set total much more efficient.
+func (computeSystem *System) statisticsInProc(job *jobobject.JobObject) (*hcsschema.Statistics, error) {
+	// Start timestamp for these stats before we grab them to match HCS
+	timestamp := time.Now()
+
+	memInfo, err := job.QueryMemoryStats()
+	if err != nil {
+		return nil, err
+	}
+
+	processorInfo, err := job.QueryProcessorStats()
+	if err != nil {
+		return nil, err
+	}
+
+	storageInfo, err := job.QueryStorageStats()
+	if err != nil {
+		return nil, err
+	}
+
+	// This calculates the private working set more efficiently than HCS does. HCS calls NtQuerySystemInformation
+	// with the class SystemProcessInformation which returns an array containing system information for *every*
+	// process running on the machine. They then grab the pids that are running in the container and filter down
+	// the entries in the array to only what's running in that silo and start tallying up the total. This doesn't
+	// work well as performance should get worse if more processess are running on the machine in general and not
+	// just in the container. All of the additional information besides the WorkingSetPrivateSize field is ignored
+	// as well which isn't great and is wasted work to fetch.
+	//
+	// HCS only let's you grab statistics in an all or nothing fashion, so we can't just grab the private
+	// working set ourselves and ask for everything else seperately. The optimization we can make here is
+	// to open the silo ourselves and do the same queries for the rest of the info, as well as calculating
+	// the private working set in a more efficient manner by:
+	//
+	// 1. Find the pids running in the silo
+	// 2. Get a process handle for every process (only need PROCESS_QUERY_LIMITED_INFORMATION access)
+	// 3. Call NtQueryInformationProcess on each process with the class ProcessVmCounters
+	// 4. Tally up the total using the field PrivateWorkingSetSize in VM_COUNTERS_EX2.
+	privateWorkingSet, err := job.QueryPrivateWorkingSet()
+	if err != nil {
+		return nil, err
+	}
 
+	return &hcsschema.Statistics{
+		Timestamp:          timestamp,
+		ContainerStartTime: computeSystem.startTime,
+		Uptime100ns:        uint64(time.Since(computeSystem.startTime).Nanoseconds()) / 100,
+		Memory: &hcsschema.MemoryStats{
+			MemoryUsageCommitBytes:            memInfo.JobMemory,
+			MemoryUsageCommitPeakBytes:        memInfo.PeakJobMemoryUsed,
+			MemoryUsagePrivateWorkingSetBytes: privateWorkingSet,
+		},
+		Processor: &hcsschema.ProcessorStats{
+			RuntimeKernel100ns: uint64(processorInfo.TotalKernelTime),
+			RuntimeUser100ns:   uint64(processorInfo.TotalUserTime),
+			TotalRuntime100ns:  uint64(processorInfo.TotalKernelTime + processorInfo.TotalUserTime),
+		},
+		Storage: &hcsschema.StorageStats{
+			ReadCountNormalized:  uint64(storageInfo.ReadStats.IoCount),
+			ReadSizeBytes:        storageInfo.ReadStats.TotalSize,
+			WriteCountNormalized: uint64(storageInfo.WriteStats.IoCount),
+			WriteSizeBytes:       storageInfo.WriteStats.TotalSize,
+		},
+	}, nil
+}
+
+// hcsPropertiesV2Query is a helper to make a HcsGetComputeSystemProperties call using the V2 schema property types.
+func (computeSystem *System) hcsPropertiesV2Query(ctx context.Context, types []hcsschema.PropertyType) (*hcsschema.Properties, error) {
 	operation := "hcs::System::PropertiesV2"
 
 	queryBytes, err := json.Marshal(hcsschema.PropertyQuery{PropertyTypes: types})
@@ -345,12 +461,66 @@ func (computeSystem *System) PropertiesV2(ctx context.Context, types ...hcsschem
 	if propertiesJSON == "" {
 		return nil, ErrUnexpectedValue
 	}
-	properties := &hcsschema.Properties{}
-	if err := json.Unmarshal([]byte(propertiesJSON), properties); err != nil {
+	props := &hcsschema.Properties{}
+	if err := json.Unmarshal([]byte(propertiesJSON), props); err != nil {
 		return nil, makeSystemError(computeSystem, operation, err, nil)
 	}
 
-	return properties, nil
+	return props, nil
+}
+
+// PropertiesV2 returns the requested compute systems properties targeting a V2 schema compute system.
+func (computeSystem *System) PropertiesV2(ctx context.Context, types ...hcsschema.PropertyType) (_ *hcsschema.Properties, err error) {
+	computeSystem.handleLock.RLock()
+	defer computeSystem.handleLock.RUnlock()
+
+	// Let HCS tally up the total for VM based queries instead of querying ourselves.
+	if computeSystem.typ != "container" {
+		return computeSystem.hcsPropertiesV2Query(ctx, types)
+	}
+
+	// Define a starter Properties struct with the default fields returned from every
+	// query. Owner is only returned from Statistics but it's harmless to include.
+	properties := &hcsschema.Properties{
+		Id:            computeSystem.id,
+		SystemType:    computeSystem.typ,
+		RuntimeOsType: computeSystem.os,
+		Owner:         computeSystem.owner,
+	}
+
+	logEntry := log.G(ctx)
+	// First lets try and query ourselves without reaching to HCS. If any of the queries fail
+	// we'll take note and fallback to querying HCS for any of the failed types.
+	fallbackTypes, err := computeSystem.queryInProc(ctx, properties, types)
+	if err == nil && len(fallbackTypes) == 0 {
+		return properties, nil
+	} else if err != nil {
+		logEntry.WithError(fmt.Errorf("failed to query compute system properties in-proc: %w", err))
+		fallbackTypes = types
+	}
+
+	logEntry.WithFields(logrus.Fields{
+		logfields.ContainerID: computeSystem.id,
+		"propertyTypes":       fallbackTypes,
+	}).Info("falling back to HCS for property type queries")
+
+	hcsProperties, err := computeSystem.hcsPropertiesV2Query(ctx, fallbackTypes)
+	if err != nil {
+		return nil, err
+	}
+
+	// Now add in anything that we might have successfully queried in process.
+	if properties.Statistics != nil {
+		hcsProperties.Statistics = properties.Statistics
+		hcsProperties.Owner = properties.Owner
+	}
+
+	// For future support for querying processlist in-proc as well.
+	if properties.ProcessList != nil {
+		hcsProperties.ProcessList = properties.ProcessList
+	}
+
+	return hcsProperties, nil
 }
 
 // Pause pauses the execution of the computeSystem. This feature is not enabled in TP5.
author	Valentin Rothberg <vrothberg@redhat.com>	2022-06-08 11:55:13 +0200
committer	Valentin Rothberg <vrothberg@redhat.com>	2022-06-10 09:42:19 +0200
commit	46c8da7d9acd6011f318ce8fa9c38591888654f0 (patch)
tree	54e097a523e9d67eb97b65e963a7c46bf2f7973d /vendor/github.com/Microsoft/hcsshim/internal/hcs/system.go
parent	9f1bd0a0a1494f46a49ca7f22511c5a78006afd8 (diff)
download	podman-46c8da7d9acd6011f318ce8fa9c38591888654f0.tar.gz podman-46c8da7d9acd6011f318ce8fa9c38591888654f0.tar.bz2 podman-46c8da7d9acd6011f318ce8fa9c38591888654f0.zip