diff options
author | Valentin Rothberg <vrothberg@redhat.com> | 2022-06-08 11:55:13 +0200 |
---|---|---|
committer | Valentin Rothberg <vrothberg@redhat.com> | 2022-06-10 09:42:19 +0200 |
commit | 46c8da7d9acd6011f318ce8fa9c38591888654f0 (patch) | |
tree | 54e097a523e9d67eb97b65e963a7c46bf2f7973d /vendor/github.com/Microsoft/hcsshim/internal/hcs | |
parent | 9f1bd0a0a1494f46a49ca7f22511c5a78006afd8 (diff) | |
download | podman-46c8da7d9acd6011f318ce8fa9c38591888654f0.tar.gz podman-46c8da7d9acd6011f318ce8fa9c38591888654f0.tar.bz2 podman-46c8da7d9acd6011f318ce8fa9c38591888654f0.zip |
vendor buildah@main
Note that the bud-logfile-with-split-logfile-by-platform test is skipped
on the remote client (see #14544).
Signed-off-by: Valentin Rothberg <vrothberg@redhat.com>
Diffstat (limited to 'vendor/github.com/Microsoft/hcsshim/internal/hcs')
-rw-r--r-- | vendor/github.com/Microsoft/hcsshim/internal/hcs/system.go | 188 |
1 files changed, 179 insertions, 9 deletions
diff --git a/vendor/github.com/Microsoft/hcsshim/internal/hcs/system.go b/vendor/github.com/Microsoft/hcsshim/internal/hcs/system.go index 75499c967..1d45a703b 100644 --- a/vendor/github.com/Microsoft/hcsshim/internal/hcs/system.go +++ b/vendor/github.com/Microsoft/hcsshim/internal/hcs/system.go @@ -4,17 +4,22 @@ import ( "context" "encoding/json" "errors" + "fmt" "strings" "sync" "syscall" + "time" "github.com/Microsoft/hcsshim/internal/cow" "github.com/Microsoft/hcsshim/internal/hcs/schema1" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + "github.com/Microsoft/hcsshim/internal/jobobject" "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/timeout" "github.com/Microsoft/hcsshim/internal/vmcompute" + "github.com/sirupsen/logrus" "go.opencensus.io/trace" ) @@ -28,7 +33,8 @@ type System struct { waitBlock chan struct{} waitError error exitError error - os, typ string + os, typ, owner string + startTime time.Time } func newSystem(id string) *System { @@ -38,6 +44,11 @@ func newSystem(id string) *System { } } +// Implementation detail for silo naming, this should NOT be relied upon very heavily. +func siloNameFmt(containerID string) string { + return fmt.Sprintf(`\Container_%s`, containerID) +} + // CreateComputeSystem creates a new compute system with the given configuration but does not start it. func CreateComputeSystem(ctx context.Context, id string, hcsDocumentInterface interface{}) (_ *System, err error) { operation := "hcs::CreateComputeSystem" @@ -127,6 +138,7 @@ func (computeSystem *System) getCachedProperties(ctx context.Context) error { } computeSystem.typ = strings.ToLower(props.SystemType) computeSystem.os = strings.ToLower(props.RuntimeOSType) + computeSystem.owner = strings.ToLower(props.Owner) if computeSystem.os == "" && computeSystem.typ == "container" { // Pre-RS5 HCS did not return the OS, but it only supported containers // that ran Windows. @@ -195,7 +207,7 @@ func (computeSystem *System) Start(ctx context.Context) (err error) { if err != nil { return makeSystemError(computeSystem, operation, err, events) } - + computeSystem.startTime = time.Now() return nil } @@ -324,11 +336,115 @@ func (computeSystem *System) Properties(ctx context.Context, types ...schema1.Pr return properties, nil } -// PropertiesV2 returns the requested container properties targeting a V2 schema container. -func (computeSystem *System) PropertiesV2(ctx context.Context, types ...hcsschema.PropertyType) (*hcsschema.Properties, error) { - computeSystem.handleLock.RLock() - defer computeSystem.handleLock.RUnlock() +// queryInProc handles querying for container properties without reaching out to HCS. `props` +// will be updated to contain any data returned from the queries present in `types`. If any properties +// failed to be queried they will be tallied up and returned in as the first return value. Failures on +// query are NOT considered errors; the only failure case for this method is if the containers job object +// cannot be opened. +func (computeSystem *System) queryInProc(ctx context.Context, props *hcsschema.Properties, types []hcsschema.PropertyType) ([]hcsschema.PropertyType, error) { + // In the future we can make use of some new functionality in the HCS that allows you + // to pass a job object for HCS to use for the container. Currently, the only way we'll + // be able to open the job/silo is if we're running as SYSTEM. + jobOptions := &jobobject.Options{ + UseNTVariant: true, + Name: siloNameFmt(computeSystem.id), + } + job, err := jobobject.Open(ctx, jobOptions) + if err != nil { + return nil, err + } + defer job.Close() + + var fallbackQueryTypes []hcsschema.PropertyType + for _, propType := range types { + switch propType { + case hcsschema.PTStatistics: + // Handle a bad caller asking for the same type twice. No use in re-querying if this is + // filled in already. + if props.Statistics == nil { + props.Statistics, err = computeSystem.statisticsInProc(job) + if err != nil { + log.G(ctx).WithError(err).Warn("failed to get statistics in-proc") + + fallbackQueryTypes = append(fallbackQueryTypes, propType) + } + } + default: + fallbackQueryTypes = append(fallbackQueryTypes, propType) + } + } + + return fallbackQueryTypes, nil +} + +// statisticsInProc emulates what HCS does to grab statistics for a given container with a small +// change to make grabbing the private working set total much more efficient. +func (computeSystem *System) statisticsInProc(job *jobobject.JobObject) (*hcsschema.Statistics, error) { + // Start timestamp for these stats before we grab them to match HCS + timestamp := time.Now() + + memInfo, err := job.QueryMemoryStats() + if err != nil { + return nil, err + } + + processorInfo, err := job.QueryProcessorStats() + if err != nil { + return nil, err + } + + storageInfo, err := job.QueryStorageStats() + if err != nil { + return nil, err + } + + // This calculates the private working set more efficiently than HCS does. HCS calls NtQuerySystemInformation + // with the class SystemProcessInformation which returns an array containing system information for *every* + // process running on the machine. They then grab the pids that are running in the container and filter down + // the entries in the array to only what's running in that silo and start tallying up the total. This doesn't + // work well as performance should get worse if more processess are running on the machine in general and not + // just in the container. All of the additional information besides the WorkingSetPrivateSize field is ignored + // as well which isn't great and is wasted work to fetch. + // + // HCS only let's you grab statistics in an all or nothing fashion, so we can't just grab the private + // working set ourselves and ask for everything else seperately. The optimization we can make here is + // to open the silo ourselves and do the same queries for the rest of the info, as well as calculating + // the private working set in a more efficient manner by: + // + // 1. Find the pids running in the silo + // 2. Get a process handle for every process (only need PROCESS_QUERY_LIMITED_INFORMATION access) + // 3. Call NtQueryInformationProcess on each process with the class ProcessVmCounters + // 4. Tally up the total using the field PrivateWorkingSetSize in VM_COUNTERS_EX2. + privateWorkingSet, err := job.QueryPrivateWorkingSet() + if err != nil { + return nil, err + } + return &hcsschema.Statistics{ + Timestamp: timestamp, + ContainerStartTime: computeSystem.startTime, + Uptime100ns: uint64(time.Since(computeSystem.startTime).Nanoseconds()) / 100, + Memory: &hcsschema.MemoryStats{ + MemoryUsageCommitBytes: memInfo.JobMemory, + MemoryUsageCommitPeakBytes: memInfo.PeakJobMemoryUsed, + MemoryUsagePrivateWorkingSetBytes: privateWorkingSet, + }, + Processor: &hcsschema.ProcessorStats{ + RuntimeKernel100ns: uint64(processorInfo.TotalKernelTime), + RuntimeUser100ns: uint64(processorInfo.TotalUserTime), + TotalRuntime100ns: uint64(processorInfo.TotalKernelTime + processorInfo.TotalUserTime), + }, + Storage: &hcsschema.StorageStats{ + ReadCountNormalized: uint64(storageInfo.ReadStats.IoCount), + ReadSizeBytes: storageInfo.ReadStats.TotalSize, + WriteCountNormalized: uint64(storageInfo.WriteStats.IoCount), + WriteSizeBytes: storageInfo.WriteStats.TotalSize, + }, + }, nil +} + +// hcsPropertiesV2Query is a helper to make a HcsGetComputeSystemProperties call using the V2 schema property types. +func (computeSystem *System) hcsPropertiesV2Query(ctx context.Context, types []hcsschema.PropertyType) (*hcsschema.Properties, error) { operation := "hcs::System::PropertiesV2" queryBytes, err := json.Marshal(hcsschema.PropertyQuery{PropertyTypes: types}) @@ -345,12 +461,66 @@ func (computeSystem *System) PropertiesV2(ctx context.Context, types ...hcsschem if propertiesJSON == "" { return nil, ErrUnexpectedValue } - properties := &hcsschema.Properties{} - if err := json.Unmarshal([]byte(propertiesJSON), properties); err != nil { + props := &hcsschema.Properties{} + if err := json.Unmarshal([]byte(propertiesJSON), props); err != nil { return nil, makeSystemError(computeSystem, operation, err, nil) } - return properties, nil + return props, nil +} + +// PropertiesV2 returns the requested compute systems properties targeting a V2 schema compute system. +func (computeSystem *System) PropertiesV2(ctx context.Context, types ...hcsschema.PropertyType) (_ *hcsschema.Properties, err error) { + computeSystem.handleLock.RLock() + defer computeSystem.handleLock.RUnlock() + + // Let HCS tally up the total for VM based queries instead of querying ourselves. + if computeSystem.typ != "container" { + return computeSystem.hcsPropertiesV2Query(ctx, types) + } + + // Define a starter Properties struct with the default fields returned from every + // query. Owner is only returned from Statistics but it's harmless to include. + properties := &hcsschema.Properties{ + Id: computeSystem.id, + SystemType: computeSystem.typ, + RuntimeOsType: computeSystem.os, + Owner: computeSystem.owner, + } + + logEntry := log.G(ctx) + // First lets try and query ourselves without reaching to HCS. If any of the queries fail + // we'll take note and fallback to querying HCS for any of the failed types. + fallbackTypes, err := computeSystem.queryInProc(ctx, properties, types) + if err == nil && len(fallbackTypes) == 0 { + return properties, nil + } else if err != nil { + logEntry.WithError(fmt.Errorf("failed to query compute system properties in-proc: %w", err)) + fallbackTypes = types + } + + logEntry.WithFields(logrus.Fields{ + logfields.ContainerID: computeSystem.id, + "propertyTypes": fallbackTypes, + }).Info("falling back to HCS for property type queries") + + hcsProperties, err := computeSystem.hcsPropertiesV2Query(ctx, fallbackTypes) + if err != nil { + return nil, err + } + + // Now add in anything that we might have successfully queried in process. + if properties.Statistics != nil { + hcsProperties.Statistics = properties.Statistics + hcsProperties.Owner = properties.Owner + } + + // For future support for querying processlist in-proc as well. + if properties.ProcessList != nil { + hcsProperties.ProcessList = properties.ProcessList + } + + return hcsProperties, nil } // Pause pauses the execution of the computeSystem. This feature is not enabled in TP5. |