summaryrefslogtreecommitdiff
path: root/vendor/github.com/Microsoft/hcsshim/internal/jobobject/jobobject.go
blob: 9c2726416418b274865a7ffe085b1821ec15c5a6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
package jobobject

import (
	"context"
	"errors"
	"fmt"
	"sync"
	"unsafe"

	"github.com/Microsoft/hcsshim/internal/queue"
	"github.com/Microsoft/hcsshim/internal/winapi"
	"golang.org/x/sys/windows"
)

// This file provides higher level constructs for the win32 job object API.
// Most of the core creation and management functions are already present in "golang.org/x/sys/windows"
// (CreateJobObject, AssignProcessToJobObject, etc.) as well as most of the limit information
// structs and associated limit flags. Whatever is not present from the job object API
// in golang.org/x/sys/windows is located in /internal/winapi.
//
// https://docs.microsoft.com/en-us/windows/win32/procthread/job-objects

// JobObject is a high level wrapper around a Windows job object. Holds a handle to
// the job, a queue to receive iocp notifications about the lifecycle
// of the job and a mutex for synchronized handle access.
type JobObject struct {
	handle     windows.Handle
	mq         *queue.MessageQueue
	handleLock sync.RWMutex
}

// JobLimits represents the resource constraints that can be applied to a job object.
type JobLimits struct {
	CPULimit           uint32
	CPUWeight          uint32
	MemoryLimitInBytes uint64
	MaxIOPS            int64
	MaxBandwidth       int64
}

type CPURateControlType uint32

const (
	WeightBased CPURateControlType = iota
	RateBased
)

// Processor resource controls
const (
	cpuLimitMin  = 1
	cpuLimitMax  = 10000
	cpuWeightMin = 1
	cpuWeightMax = 9
)

var (
	ErrAlreadyClosed = errors.New("the handle has already been closed")
	ErrNotRegistered = errors.New("job is not registered to receive notifications")
)

// Options represents the set of configurable options when making or opening a job object.
type Options struct {
	// `Name` specifies the name of the job object if a named job object is desired.
	Name string
	// `Notifications` specifies if the job will be registered to receive notifications.
	// Defaults to false.
	Notifications bool
	// `UseNTVariant` specifies if we should use the `Nt` variant of Open/CreateJobObject.
	// Defaults to false.
	UseNTVariant bool
}

// Create creates a job object.
//
// If options.Name is an empty string, the job will not be assigned a name.
//
// If options.Notifications are not enabled `PollNotifications` will return immediately with error `errNotRegistered`.
//
// If `options` is nil, use default option values.
//
// Returns a JobObject structure and an error if there is one.
func Create(ctx context.Context, options *Options) (_ *JobObject, err error) {
	if options == nil {
		options = &Options{}
	}

	var jobName *winapi.UnicodeString
	if options.Name != "" {
		jobName, err = winapi.NewUnicodeString(options.Name)
		if err != nil {
			return nil, err
		}
	}

	var jobHandle windows.Handle
	if options.UseNTVariant {
		oa := winapi.ObjectAttributes{
			Length:     unsafe.Sizeof(winapi.ObjectAttributes{}),
			ObjectName: jobName,
			Attributes: 0,
		}
		status := winapi.NtCreateJobObject(&jobHandle, winapi.JOB_OBJECT_ALL_ACCESS, &oa)
		if status != 0 {
			return nil, winapi.RtlNtStatusToDosError(status)
		}
	} else {
		var jobNameBuf *uint16
		if jobName != nil && jobName.Buffer != nil {
			jobNameBuf = jobName.Buffer
		}
		jobHandle, err = windows.CreateJobObject(nil, jobNameBuf)
		if err != nil {
			return nil, err
		}
	}

	defer func() {
		if err != nil {
			windows.Close(jobHandle)
		}
	}()

	job := &JobObject{
		handle: jobHandle,
	}

	// If the IOCP we'll be using to receive messages for all jobs hasn't been
	// created, create it and start polling.
	if options.Notifications {
		mq, err := setupNotifications(ctx, job)
		if err != nil {
			return nil, err
		}
		job.mq = mq
	}

	return job, nil
}

// Open opens an existing job object with name provided in `options`. If no name is provided
// return an error since we need to know what job object to open.
//
// If options.Notifications is false `PollNotifications` will return immediately with error `errNotRegistered`.
//
// Returns a JobObject structure and an error if there is one.
func Open(ctx context.Context, options *Options) (_ *JobObject, err error) {
	if options == nil || (options != nil && options.Name == "") {
		return nil, errors.New("no job object name specified to open")
	}

	unicodeJobName, err := winapi.NewUnicodeString(options.Name)
	if err != nil {
		return nil, err
	}

	var jobHandle windows.Handle
	if options != nil && options.UseNTVariant {
		oa := winapi.ObjectAttributes{
			Length:     unsafe.Sizeof(winapi.ObjectAttributes{}),
			ObjectName: unicodeJobName,
			Attributes: 0,
		}
		status := winapi.NtOpenJobObject(&jobHandle, winapi.JOB_OBJECT_ALL_ACCESS, &oa)
		if status != 0 {
			return nil, winapi.RtlNtStatusToDosError(status)
		}
	} else {
		jobHandle, err = winapi.OpenJobObject(winapi.JOB_OBJECT_ALL_ACCESS, false, unicodeJobName.Buffer)
		if err != nil {
			return nil, err
		}
	}

	defer func() {
		if err != nil {
			windows.Close(jobHandle)
		}
	}()

	job := &JobObject{
		handle: jobHandle,
	}

	// If the IOCP we'll be using to receive messages for all jobs hasn't been
	// created, create it and start polling.
	if options != nil && options.Notifications {
		mq, err := setupNotifications(ctx, job)
		if err != nil {
			return nil, err
		}
		job.mq = mq
	}

	return job, nil
}

// helper function to setup notifications for creating/opening a job object
func setupNotifications(ctx context.Context, job *JobObject) (*queue.MessageQueue, error) {
	job.handleLock.RLock()
	defer job.handleLock.RUnlock()

	if job.handle == 0 {
		return nil, ErrAlreadyClosed
	}

	ioInitOnce.Do(func() {
		h, err := windows.CreateIoCompletionPort(windows.InvalidHandle, 0, 0, 0xffffffff)
		if err != nil {
			initIOErr = err
			return
		}
		ioCompletionPort = h
		go pollIOCP(ctx, h)
	})

	if initIOErr != nil {
		return nil, initIOErr
	}

	mq := queue.NewMessageQueue()
	jobMap.Store(uintptr(job.handle), mq)
	if err := attachIOCP(job.handle, ioCompletionPort); err != nil {
		jobMap.Delete(uintptr(job.handle))
		return nil, fmt.Errorf("failed to attach job to IO completion port: %w", err)
	}
	return mq, nil
}

// PollNotification will poll for a job object notification. This call should only be called once
// per job (ideally in a goroutine loop) and will block if there is not a notification ready.
// This call will return immediately with error `ErrNotRegistered` if the job was not registered
// to receive notifications during `Create`. Internally, messages will be queued and there
// is no worry of messages being dropped.
func (job *JobObject) PollNotification() (interface{}, error) {
	if job.mq == nil {
		return nil, ErrNotRegistered
	}
	return job.mq.ReadOrWait()
}

// UpdateProcThreadAttribute updates the passed in ProcThreadAttributeList to contain what is necessary to
// launch a process in a job at creation time. This can be used to avoid having to call Assign() after a process
// has already started running.
func (job *JobObject) UpdateProcThreadAttribute(attrList *windows.ProcThreadAttributeListContainer) error {
	job.handleLock.RLock()
	defer job.handleLock.RUnlock()

	if job.handle == 0 {
		return ErrAlreadyClosed
	}

	if err := attrList.Update(
		winapi.PROC_THREAD_ATTRIBUTE_JOB_LIST,
		unsafe.Pointer(&job.handle),
		unsafe.Sizeof(job.handle),
	); err != nil {
		return fmt.Errorf("failed to update proc thread attributes for job object: %w", err)
	}

	return nil
}

// Close closes the job object handle.
func (job *JobObject) Close() error {
	job.handleLock.Lock()
	defer job.handleLock.Unlock()

	if job.handle == 0 {
		return ErrAlreadyClosed
	}

	if err := windows.Close(job.handle); err != nil {
		return err
	}

	if job.mq != nil {
		job.mq.Close()
	}
	// Handles now invalid so if the map entry to receive notifications for this job still
	// exists remove it so we can stop receiving notifications.
	if _, ok := jobMap.Load(uintptr(job.handle)); ok {
		jobMap.Delete(uintptr(job.handle))
	}

	job.handle = 0
	return nil
}

// Assign assigns a process to the job object.
func (job *JobObject) Assign(pid uint32) error {
	job.handleLock.RLock()
	defer job.handleLock.RUnlock()

	if job.handle == 0 {
		return ErrAlreadyClosed
	}

	if pid == 0 {
		return errors.New("invalid pid: 0")
	}
	hProc, err := windows.OpenProcess(winapi.PROCESS_ALL_ACCESS, true, pid)
	if err != nil {
		return err
	}
	defer windows.Close(hProc)
	return windows.AssignProcessToJobObject(job.handle, hProc)
}

// Terminate terminates the job, essentially calls TerminateProcess on every process in the
// job.
func (job *JobObject) Terminate(exitCode uint32) error {
	job.handleLock.RLock()
	defer job.handleLock.RUnlock()
	if job.handle == 0 {
		return ErrAlreadyClosed
	}
	return windows.TerminateJobObject(job.handle, exitCode)
}

// Pids returns all of the process IDs in the job object.
func (job *JobObject) Pids() ([]uint32, error) {
	job.handleLock.RLock()
	defer job.handleLock.RUnlock()

	if job.handle == 0 {
		return nil, ErrAlreadyClosed
	}

	info := winapi.JOBOBJECT_BASIC_PROCESS_ID_LIST{}
	err := winapi.QueryInformationJobObject(
		job.handle,
		winapi.JobObjectBasicProcessIdList,
		uintptr(unsafe.Pointer(&info)),
		uint32(unsafe.Sizeof(info)),
		nil,
	)

	// This is either the case where there is only one process or no processes in
	// the job. Any other case will result in ERROR_MORE_DATA. Check if info.NumberOfProcessIdsInList
	// is 1 and just return this, otherwise return an empty slice.
	if err == nil {
		if info.NumberOfProcessIdsInList == 1 {
			return []uint32{uint32(info.ProcessIdList[0])}, nil
		}
		// Return empty slice instead of nil to play well with the caller of this.
		// Do not return an error if no processes are running inside the job
		return []uint32{}, nil
	}

	if err != winapi.ERROR_MORE_DATA {
		return nil, fmt.Errorf("failed initial query for PIDs in job object: %w", err)
	}

	jobBasicProcessIDListSize := unsafe.Sizeof(info) + (unsafe.Sizeof(info.ProcessIdList[0]) * uintptr(info.NumberOfAssignedProcesses-1))
	buf := make([]byte, jobBasicProcessIDListSize)
	if err = winapi.QueryInformationJobObject(
		job.handle,
		winapi.JobObjectBasicProcessIdList,
		uintptr(unsafe.Pointer(&buf[0])),
		uint32(len(buf)),
		nil,
	); err != nil {
		return nil, fmt.Errorf("failed to query for PIDs in job object: %w", err)
	}

	bufInfo := (*winapi.JOBOBJECT_BASIC_PROCESS_ID_LIST)(unsafe.Pointer(&buf[0]))
	pids := make([]uint32, bufInfo.NumberOfProcessIdsInList)
	for i, bufPid := range bufInfo.AllPids() {
		pids[i] = uint32(bufPid)
	}
	return pids, nil
}

// QueryMemoryStats gets the memory stats for the job object.
func (job *JobObject) QueryMemoryStats() (*winapi.JOBOBJECT_MEMORY_USAGE_INFORMATION, error) {
	job.handleLock.RLock()
	defer job.handleLock.RUnlock()

	if job.handle == 0 {
		return nil, ErrAlreadyClosed
	}

	info := winapi.JOBOBJECT_MEMORY_USAGE_INFORMATION{}
	if err := winapi.QueryInformationJobObject(
		job.handle,
		winapi.JobObjectMemoryUsageInformation,
		uintptr(unsafe.Pointer(&info)),
		uint32(unsafe.Sizeof(info)),
		nil,
	); err != nil {
		return nil, fmt.Errorf("failed to query for job object memory stats: %w", err)
	}
	return &info, nil
}

// QueryProcessorStats gets the processor stats for the job object.
func (job *JobObject) QueryProcessorStats() (*winapi.JOBOBJECT_BASIC_ACCOUNTING_INFORMATION, error) {
	job.handleLock.RLock()
	defer job.handleLock.RUnlock()

	if job.handle == 0 {
		return nil, ErrAlreadyClosed
	}

	info := winapi.JOBOBJECT_BASIC_ACCOUNTING_INFORMATION{}
	if err := winapi.QueryInformationJobObject(
		job.handle,
		winapi.JobObjectBasicAccountingInformation,
		uintptr(unsafe.Pointer(&info)),
		uint32(unsafe.Sizeof(info)),
		nil,
	); err != nil {
		return nil, fmt.Errorf("failed to query for job object process stats: %w", err)
	}
	return &info, nil
}

// QueryStorageStats gets the storage (I/O) stats for the job object.
func (job *JobObject) QueryStorageStats() (*winapi.JOBOBJECT_IO_ATTRIBUTION_INFORMATION, error) {
	job.handleLock.RLock()
	defer job.handleLock.RUnlock()

	if job.handle == 0 {
		return nil, ErrAlreadyClosed
	}

	info := winapi.JOBOBJECT_IO_ATTRIBUTION_INFORMATION{
		ControlFlags: winapi.JOBOBJECT_IO_ATTRIBUTION_CONTROL_ENABLE,
	}
	if err := winapi.QueryInformationJobObject(
		job.handle,
		winapi.JobObjectIoAttribution,
		uintptr(unsafe.Pointer(&info)),
		uint32(unsafe.Sizeof(info)),
		nil,
	); err != nil {
		return nil, fmt.Errorf("failed to query for job object storage stats: %w", err)
	}
	return &info, nil
}

// QueryPrivateWorkingSet returns the private working set size for the job. This is calculated by adding up the
// private working set for every process running in the job.
func (job *JobObject) QueryPrivateWorkingSet() (uint64, error) {
	pids, err := job.Pids()
	if err != nil {
		return 0, err
	}

	openAndQueryWorkingSet := func(pid uint32) (uint64, error) {
		h, err := windows.OpenProcess(windows.PROCESS_QUERY_LIMITED_INFORMATION, false, pid)
		if err != nil {
			// Continue to the next if OpenProcess doesn't return a valid handle (fails). Handles a
			// case where one of the pids in the job exited before we open.
			return 0, nil
		}
		defer func() {
			_ = windows.Close(h)
		}()
		// Check if the process is actually running in the job still. There's a small chance
		// that the process could have exited and had its pid re-used between grabbing the pids
		// in the job and opening the handle to it above.
		var inJob int32
		if err := winapi.IsProcessInJob(h, job.handle, &inJob); err != nil {
			// This shouldn't fail unless we have incorrect access rights which we control
			// here so probably best to error out if this failed.
			return 0, err
		}
		// Don't report stats for this process as it's not running in the job. This shouldn't be
		// an error condition though.
		if inJob == 0 {
			return 0, nil
		}

		var vmCounters winapi.VM_COUNTERS_EX2
		status := winapi.NtQueryInformationProcess(
			h,
			winapi.ProcessVmCounters,
			uintptr(unsafe.Pointer(&vmCounters)),
			uint32(unsafe.Sizeof(vmCounters)),
			nil,
		)
		if !winapi.NTSuccess(status) {
			return 0, fmt.Errorf("failed to query information for process: %w", winapi.RtlNtStatusToDosError(status))
		}
		return uint64(vmCounters.PrivateWorkingSetSize), nil
	}

	var jobWorkingSetSize uint64
	for _, pid := range pids {
		workingSet, err := openAndQueryWorkingSet(pid)
		if err != nil {
			return 0, err
		}
		jobWorkingSetSize += workingSet
	}

	return jobWorkingSetSize, nil
}