diff options
Diffstat (limited to 'vendor/github.com/opencontainers')
162 files changed, 23667 insertions, 0 deletions
diff --git a/vendor/github.com/opencontainers/go-digest/LICENSE.code b/vendor/github.com/opencontainers/go-digest/LICENSE.code new file mode 100644 index 000000000..0ea3ff81e --- /dev/null +++ b/vendor/github.com/opencontainers/go-digest/LICENSE.code @@ -0,0 +1,191 @@ + + Apache License + Version 2.0, January 2004 + https://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + Copyright 2016 Docker, Inc. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/opencontainers/go-digest/LICENSE.docs b/vendor/github.com/opencontainers/go-digest/LICENSE.docs new file mode 100644 index 000000000..e26cd4fc8 --- /dev/null +++ b/vendor/github.com/opencontainers/go-digest/LICENSE.docs @@ -0,0 +1,425 @@ +Attribution-ShareAlike 4.0 International + +======================================================================= + +Creative Commons Corporation ("Creative Commons") is not a law firm and +does not provide legal services or legal advice. Distribution of +Creative Commons public licenses does not create a lawyer-client or +other relationship. Creative Commons makes its licenses and related +information available on an "as-is" basis. Creative Commons gives no +warranties regarding its licenses, any material licensed under their +terms and conditions, or any related information. Creative Commons +disclaims all liability for damages resulting from their use to the +fullest extent possible. + +Using Creative Commons Public Licenses + +Creative Commons public licenses provide a standard set of terms and +conditions that creators and other rights holders may use to share +original works of authorship and other material subject to copyright +and certain other rights specified in the public license below. The +following considerations are for informational purposes only, are not +exhaustive, and do not form part of our licenses. + + Considerations for licensors: Our public licenses are + intended for use by those authorized to give the public + permission to use material in ways otherwise restricted by + copyright and certain other rights. Our licenses are + irrevocable. Licensors should read and understand the terms + and conditions of the license they choose before applying it. + Licensors should also secure all rights necessary before + applying our licenses so that the public can reuse the + material as expected. Licensors should clearly mark any + material not subject to the license. This includes other CC- + licensed material, or material used under an exception or + limitation to copyright. More considerations for licensors: + wiki.creativecommons.org/Considerations_for_licensors + + Considerations for the public: By using one of our public + licenses, a licensor grants the public permission to use the + licensed material under specified terms and conditions. If + the licensor's permission is not necessary for any reason--for + example, because of any applicable exception or limitation to + copyright--then that use is not regulated by the license. Our + licenses grant only permissions under copyright and certain + other rights that a licensor has authority to grant. Use of + the licensed material may still be restricted for other + reasons, including because others have copyright or other + rights in the material. A licensor may make special requests, + such as asking that all changes be marked or described. + Although not required by our licenses, you are encouraged to + respect those requests where reasonable. More_considerations + for the public: + wiki.creativecommons.org/Considerations_for_licensees + +======================================================================= + +Creative Commons Attribution-ShareAlike 4.0 International Public +License + +By exercising the Licensed Rights (defined below), You accept and agree +to be bound by the terms and conditions of this Creative Commons +Attribution-ShareAlike 4.0 International Public License ("Public +License"). To the extent this Public License may be interpreted as a +contract, You are granted the Licensed Rights in consideration of Your +acceptance of these terms and conditions, and the Licensor grants You +such rights in consideration of benefits the Licensor receives from +making the Licensed Material available under these terms and +conditions. + + +Section 1 -- Definitions. + + a. Adapted Material means material subject to Copyright and Similar + Rights that is derived from or based upon the Licensed Material + and in which the Licensed Material is translated, altered, + arranged, transformed, or otherwise modified in a manner requiring + permission under the Copyright and Similar Rights held by the + Licensor. For purposes of this Public License, where the Licensed + Material is a musical work, performance, or sound recording, + Adapted Material is always produced where the Licensed Material is + synched in timed relation with a moving image. + + b. Adapter's License means the license You apply to Your Copyright + and Similar Rights in Your contributions to Adapted Material in + accordance with the terms and conditions of this Public License. + + c. BY-SA Compatible License means a license listed at + creativecommons.org/compatiblelicenses, approved by Creative + Commons as essentially the equivalent of this Public License. + + d. Copyright and Similar Rights means copyright and/or similar rights + closely related to copyright including, without limitation, + performance, broadcast, sound recording, and Sui Generis Database + Rights, without regard to how the rights are labeled or + categorized. For purposes of this Public License, the rights + specified in Section 2(b)(1)-(2) are not Copyright and Similar + Rights. + + e. Effective Technological Measures means those measures that, in the + absence of proper authority, may not be circumvented under laws + fulfilling obligations under Article 11 of the WIPO Copyright + Treaty adopted on December 20, 1996, and/or similar international + agreements. + + f. Exceptions and Limitations means fair use, fair dealing, and/or + any other exception or limitation to Copyright and Similar Rights + that applies to Your use of the Licensed Material. + + g. License Elements means the license attributes listed in the name + of a Creative Commons Public License. The License Elements of this + Public License are Attribution and ShareAlike. + + h. Licensed Material means the artistic or literary work, database, + or other material to which the Licensor applied this Public + License. + + i. Licensed Rights means the rights granted to You subject to the + terms and conditions of this Public License, which are limited to + all Copyright and Similar Rights that apply to Your use of the + Licensed Material and that the Licensor has authority to license. + + j. Licensor means the individual(s) or entity(ies) granting rights + under this Public License. + + k. Share means to provide material to the public by any means or + process that requires permission under the Licensed Rights, such + as reproduction, public display, public performance, distribution, + dissemination, communication, or importation, and to make material + available to the public including in ways that members of the + public may access the material from a place and at a time + individually chosen by them. + + l. Sui Generis Database Rights means rights other than copyright + resulting from Directive 96/9/EC of the European Parliament and of + the Council of 11 March 1996 on the legal protection of databases, + as amended and/or succeeded, as well as other essentially + equivalent rights anywhere in the world. + + m. You means the individual or entity exercising the Licensed Rights + under this Public License. Your has a corresponding meaning. + + +Section 2 -- Scope. + + a. License grant. + + 1. Subject to the terms and conditions of this Public License, + the Licensor hereby grants You a worldwide, royalty-free, + non-sublicensable, non-exclusive, irrevocable license to + exercise the Licensed Rights in the Licensed Material to: + + a. reproduce and Share the Licensed Material, in whole or + in part; and + + b. produce, reproduce, and Share Adapted Material. + + 2. Exceptions and Limitations. For the avoidance of doubt, where + Exceptions and Limitations apply to Your use, this Public + License does not apply, and You do not need to comply with + its terms and conditions. + + 3. Term. The term of this Public License is specified in Section + 6(a). + + 4. Media and formats; technical modifications allowed. The + Licensor authorizes You to exercise the Licensed Rights in + all media and formats whether now known or hereafter created, + and to make technical modifications necessary to do so. The + Licensor waives and/or agrees not to assert any right or + authority to forbid You from making technical modifications + necessary to exercise the Licensed Rights, including + technical modifications necessary to circumvent Effective + Technological Measures. For purposes of this Public License, + simply making modifications authorized by this Section 2(a) + (4) never produces Adapted Material. + + 5. Downstream recipients. + + a. Offer from the Licensor -- Licensed Material. Every + recipient of the Licensed Material automatically + receives an offer from the Licensor to exercise the + Licensed Rights under the terms and conditions of this + Public License. + + b. Additional offer from the Licensor -- Adapted Material. + Every recipient of Adapted Material from You + automatically receives an offer from the Licensor to + exercise the Licensed Rights in the Adapted Material + under the conditions of the Adapter's License You apply. + + c. No downstream restrictions. You may not offer or impose + any additional or different terms or conditions on, or + apply any Effective Technological Measures to, the + Licensed Material if doing so restricts exercise of the + Licensed Rights by any recipient of the Licensed + Material. + + 6. No endorsement. Nothing in this Public License constitutes or + may be construed as permission to assert or imply that You + are, or that Your use of the Licensed Material is, connected + with, or sponsored, endorsed, or granted official status by, + the Licensor or others designated to receive attribution as + provided in Section 3(a)(1)(A)(i). + + b. Other rights. + + 1. Moral rights, such as the right of integrity, are not + licensed under this Public License, nor are publicity, + privacy, and/or other similar personality rights; however, to + the extent possible, the Licensor waives and/or agrees not to + assert any such rights held by the Licensor to the limited + extent necessary to allow You to exercise the Licensed + Rights, but not otherwise. + + 2. Patent and trademark rights are not licensed under this + Public License. + + 3. To the extent possible, the Licensor waives any right to + collect royalties from You for the exercise of the Licensed + Rights, whether directly or through a collecting society + under any voluntary or waivable statutory or compulsory + licensing scheme. In all other cases the Licensor expressly + reserves any right to collect such royalties. + + +Section 3 -- License Conditions. + +Your exercise of the Licensed Rights is expressly made subject to the +following conditions. + + a. Attribution. + + 1. If You Share the Licensed Material (including in modified + form), You must: + + a. retain the following if it is supplied by the Licensor + with the Licensed Material: + + i. identification of the creator(s) of the Licensed + Material and any others designated to receive + attribution, in any reasonable manner requested by + the Licensor (including by pseudonym if + designated); + + ii. a copyright notice; + + iii. a notice that refers to this Public License; + + iv. a notice that refers to the disclaimer of + warranties; + + v. a URI or hyperlink to the Licensed Material to the + extent reasonably practicable; + + b. indicate if You modified the Licensed Material and + retain an indication of any previous modifications; and + + c. indicate the Licensed Material is licensed under this + Public License, and include the text of, or the URI or + hyperlink to, this Public License. + + 2. You may satisfy the conditions in Section 3(a)(1) in any + reasonable manner based on the medium, means, and context in + which You Share the Licensed Material. For example, it may be + reasonable to satisfy the conditions by providing a URI or + hyperlink to a resource that includes the required + information. + + 3. If requested by the Licensor, You must remove any of the + information required by Section 3(a)(1)(A) to the extent + reasonably practicable. + + b. ShareAlike. + + In addition to the conditions in Section 3(a), if You Share + Adapted Material You produce, the following conditions also apply. + + 1. The Adapter's License You apply must be a Creative Commons + license with the same License Elements, this version or + later, or a BY-SA Compatible License. + + 2. You must include the text of, or the URI or hyperlink to, the + Adapter's License You apply. You may satisfy this condition + in any reasonable manner based on the medium, means, and + context in which You Share Adapted Material. + + 3. You may not offer or impose any additional or different terms + or conditions on, or apply any Effective Technological + Measures to, Adapted Material that restrict exercise of the + rights granted under the Adapter's License You apply. + + +Section 4 -- Sui Generis Database Rights. + +Where the Licensed Rights include Sui Generis Database Rights that +apply to Your use of the Licensed Material: + + a. for the avoidance of doubt, Section 2(a)(1) grants You the right + to extract, reuse, reproduce, and Share all or a substantial + portion of the contents of the database; + + b. if You include all or a substantial portion of the database + contents in a database in which You have Sui Generis Database + Rights, then the database in which You have Sui Generis Database + Rights (but not its individual contents) is Adapted Material, + + including for purposes of Section 3(b); and + c. You must comply with the conditions in Section 3(a) if You Share + all or a substantial portion of the contents of the database. + +For the avoidance of doubt, this Section 4 supplements and does not +replace Your obligations under this Public License where the Licensed +Rights include other Copyright and Similar Rights. + + +Section 5 -- Disclaimer of Warranties and Limitation of Liability. + + a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE + EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS + AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF + ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, + IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, + WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR + PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, + ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT + KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT + ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. + + b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE + TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, + NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, + INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, + COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR + USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN + ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR + DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR + IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. + + c. The disclaimer of warranties and limitation of liability provided + above shall be interpreted in a manner that, to the extent + possible, most closely approximates an absolute disclaimer and + waiver of all liability. + + +Section 6 -- Term and Termination. + + a. This Public License applies for the term of the Copyright and + Similar Rights licensed here. However, if You fail to comply with + this Public License, then Your rights under this Public License + terminate automatically. + + b. Where Your right to use the Licensed Material has terminated under + Section 6(a), it reinstates: + + 1. automatically as of the date the violation is cured, provided + it is cured within 30 days of Your discovery of the + violation; or + + 2. upon express reinstatement by the Licensor. + + For the avoidance of doubt, this Section 6(b) does not affect any + right the Licensor may have to seek remedies for Your violations + of this Public License. + + c. For the avoidance of doubt, the Licensor may also offer the + Licensed Material under separate terms or conditions or stop + distributing the Licensed Material at any time; however, doing so + will not terminate this Public License. + + d. Sections 1, 5, 6, 7, and 8 survive termination of this Public + License. + + +Section 7 -- Other Terms and Conditions. + + a. The Licensor shall not be bound by any additional or different + terms or conditions communicated by You unless expressly agreed. + + b. Any arrangements, understandings, or agreements regarding the + Licensed Material not stated herein are separate from and + independent of the terms and conditions of this Public License. + + +Section 8 -- Interpretation. + + a. For the avoidance of doubt, this Public License does not, and + shall not be interpreted to, reduce, limit, restrict, or impose + conditions on any use of the Licensed Material that could lawfully + be made without permission under this Public License. + + b. To the extent possible, if any provision of this Public License is + deemed unenforceable, it shall be automatically reformed to the + minimum extent necessary to make it enforceable. If the provision + cannot be reformed, it shall be severed from this Public License + without affecting the enforceability of the remaining terms and + conditions. + + c. No term or condition of this Public License will be waived and no + failure to comply consented to unless expressly agreed to by the + Licensor. + + d. Nothing in this Public License constitutes or may be interpreted + as a limitation upon, or waiver of, any privileges and immunities + that apply to the Licensor or You, including from the legal + processes of any jurisdiction or authority. + + +======================================================================= + +Creative Commons is not a party to its public licenses. +Notwithstanding, Creative Commons may elect to apply one of its public +licenses to material it publishes and in those instances will be +considered the "Licensor." Except for the limited purpose of indicating +that material is shared under a Creative Commons public license or as +otherwise permitted by the Creative Commons policies published at +creativecommons.org/policies, Creative Commons does not authorize the +use of the trademark "Creative Commons" or any other trademark or logo +of Creative Commons without its prior written consent including, +without limitation, in connection with any unauthorized modifications +to any of its public licenses or any other arrangements, +understandings, or agreements concerning use of licensed material. For +the avoidance of doubt, this paragraph does not form part of the public +licenses. + +Creative Commons may be contacted at creativecommons.org. diff --git a/vendor/github.com/opencontainers/go-digest/README.md b/vendor/github.com/opencontainers/go-digest/README.md new file mode 100644 index 000000000..0f5a04092 --- /dev/null +++ b/vendor/github.com/opencontainers/go-digest/README.md @@ -0,0 +1,104 @@ +# go-digest + +[![GoDoc](https://godoc.org/github.com/opencontainers/go-digest?status.svg)](https://godoc.org/github.com/opencontainers/go-digest) [![Go Report Card](https://goreportcard.com/badge/github.com/opencontainers/go-digest)](https://goreportcard.com/report/github.com/opencontainers/go-digest) [![Build Status](https://travis-ci.org/opencontainers/go-digest.svg?branch=master)](https://travis-ci.org/opencontainers/go-digest) + +Common digest package used across the container ecosystem. + +Please see the [godoc](https://godoc.org/github.com/opencontainers/go-digest) for more information. + +# What is a digest? + +A digest is just a hash. + +The most common use case for a digest is to create a content +identifier for use in [Content Addressable Storage](https://en.wikipedia.org/wiki/Content-addressable_storage) +systems: + +```go +id := digest.FromBytes([]byte("my content")) +``` + +In the example above, the id can be used to uniquely identify +the byte slice "my content". This allows two disparate applications +to agree on a verifiable identifier without having to trust one +another. + +An identifying digest can be verified, as follows: + +```go +if id != digest.FromBytes([]byte("my content")) { + return errors.New("the content has changed!") +} +``` + +A `Verifier` type can be used to handle cases where an `io.Reader` +makes more sense: + +```go +rd := getContent() +verifier := id.Verifier() +io.Copy(verifier, rd) + +if !verifier.Verified() { + return errors.New("the content has changed!") +} +``` + +Using [Merkle DAGs](https://en.wikipedia.org/wiki/Merkle_tree), this +can power a rich, safe, content distribution system. + +# Usage + +While the [godoc](https://godoc.org/github.com/opencontainers/go-digest) is +considered the best resource, a few important items need to be called +out when using this package. + +1. Make sure to import the hash implementations into your application + or the package will panic. You should have something like the + following in the main (or other entrypoint) of your application: + + ```go + import ( + _ "crypto/sha256" + _ "crypto/sha512" + ) + ``` + This may seem inconvenient but it allows you replace the hash + implementations with others, such as https://github.com/stevvooe/resumable. + +2. Even though `digest.Digest` may be assemable as a string, _always_ + verify your input with `digest.Parse` or use `Digest.Validate` + when accepting untrusted input. While there are measures to + avoid common problems, this will ensure you have valid digests + in the rest of your application. + +# Stability + +The Go API, at this stage, is considered stable, unless otherwise noted. + +As always, before using a package export, read the [godoc](https://godoc.org/github.com/opencontainers/go-digest). + +# Contributing + +This package is considered fairly complete. It has been in production +in thousands (millions?) of deployments and is fairly battle-hardened. +New additions will be met with skepticism. If you think there is a +missing feature, please file a bug clearly describing the problem and +the alternatives you tried before submitting a PR. + +# Reporting security issues + +Please DO NOT file a public issue, instead send your report privately to +security@opencontainers.org. + +The maintainers take security seriously. If you discover a security issue, +please bring it to their attention right away! + +If you are reporting a security issue, do not create an issue or file a pull +request on GitHub. Instead, disclose the issue responsibly by sending an email +to security@opencontainers.org (which is inhabited only by the maintainers of +the various OCI projects). + +# Copyright and license + +Copyright © 2016 Docker, Inc. All rights reserved, except as follows. Code is released under the [Apache 2.0 license](LICENSE.code). This `README.md` file and the [`CONTRIBUTING.md`](CONTRIBUTING.md) file are licensed under the Creative Commons Attribution 4.0 International License under the terms and conditions set forth in the file [`LICENSE.docs`](LICENSE.docs). You may obtain a duplicate copy of the same license, titled CC BY-SA 4.0, at http://creativecommons.org/licenses/by-sa/4.0/. diff --git a/vendor/github.com/opencontainers/go-digest/algorithm.go b/vendor/github.com/opencontainers/go-digest/algorithm.go new file mode 100644 index 000000000..bdff42d92 --- /dev/null +++ b/vendor/github.com/opencontainers/go-digest/algorithm.go @@ -0,0 +1,158 @@ +// Copyright 2017 Docker, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package digest + +import ( + "crypto" + "fmt" + "hash" + "io" +) + +// Algorithm identifies and implementation of a digester by an identifier. +// Note the that this defines both the hash algorithm used and the string +// encoding. +type Algorithm string + +// supported digest types +const ( + SHA256 Algorithm = "sha256" // sha256 with hex encoding + SHA384 Algorithm = "sha384" // sha384 with hex encoding + SHA512 Algorithm = "sha512" // sha512 with hex encoding + + // Canonical is the primary digest algorithm used with the distribution + // project. Other digests may be used but this one is the primary storage + // digest. + Canonical = SHA256 +) + +var ( + // TODO(stevvooe): Follow the pattern of the standard crypto package for + // registration of digests. Effectively, we are a registerable set and + // common symbol access. + + // algorithms maps values to hash.Hash implementations. Other algorithms + // may be available but they cannot be calculated by the digest package. + algorithms = map[Algorithm]crypto.Hash{ + SHA256: crypto.SHA256, + SHA384: crypto.SHA384, + SHA512: crypto.SHA512, + } +) + +// Available returns true if the digest type is available for use. If this +// returns false, Digester and Hash will return nil. +func (a Algorithm) Available() bool { + h, ok := algorithms[a] + if !ok { + return false + } + + // check availability of the hash, as well + return h.Available() +} + +func (a Algorithm) String() string { + return string(a) +} + +// Size returns number of bytes returned by the hash. +func (a Algorithm) Size() int { + h, ok := algorithms[a] + if !ok { + return 0 + } + return h.Size() +} + +// Set implemented to allow use of Algorithm as a command line flag. +func (a *Algorithm) Set(value string) error { + if value == "" { + *a = Canonical + } else { + // just do a type conversion, support is queried with Available. + *a = Algorithm(value) + } + + if !a.Available() { + return ErrDigestUnsupported + } + + return nil +} + +// Digester returns a new digester for the specified algorithm. If the algorithm +// does not have a digester implementation, nil will be returned. This can be +// checked by calling Available before calling Digester. +func (a Algorithm) Digester() Digester { + return &digester{ + alg: a, + hash: a.Hash(), + } +} + +// Hash returns a new hash as used by the algorithm. If not available, the +// method will panic. Check Algorithm.Available() before calling. +func (a Algorithm) Hash() hash.Hash { + if !a.Available() { + // Empty algorithm string is invalid + if a == "" { + panic(fmt.Sprintf("empty digest algorithm, validate before calling Algorithm.Hash()")) + } + + // NOTE(stevvooe): A missing hash is usually a programming error that + // must be resolved at compile time. We don't import in the digest + // package to allow users to choose their hash implementation (such as + // when using stevvooe/resumable or a hardware accelerated package). + // + // Applications that may want to resolve the hash at runtime should + // call Algorithm.Available before call Algorithm.Hash(). + panic(fmt.Sprintf("%v not available (make sure it is imported)", a)) + } + + return algorithms[a].New() +} + +// FromReader returns the digest of the reader using the algorithm. +func (a Algorithm) FromReader(rd io.Reader) (Digest, error) { + digester := a.Digester() + + if _, err := io.Copy(digester.Hash(), rd); err != nil { + return "", err + } + + return digester.Digest(), nil +} + +// FromBytes digests the input and returns a Digest. +func (a Algorithm) FromBytes(p []byte) Digest { + digester := a.Digester() + + if _, err := digester.Hash().Write(p); err != nil { + // Writes to a Hash should never fail. None of the existing + // hash implementations in the stdlib or hashes vendored + // here can return errors from Write. Having a panic in this + // condition instead of having FromBytes return an error value + // avoids unnecessary error handling paths in all callers. + panic("write to hash function returned error: " + err.Error()) + } + + return digester.Digest() +} + +// FromString digests the string input and returns a Digest. +func (a Algorithm) FromString(s string) Digest { + return a.FromBytes([]byte(s)) +} diff --git a/vendor/github.com/opencontainers/go-digest/digest.go b/vendor/github.com/opencontainers/go-digest/digest.go new file mode 100644 index 000000000..69e1d2b54 --- /dev/null +++ b/vendor/github.com/opencontainers/go-digest/digest.go @@ -0,0 +1,154 @@ +// Copyright 2017 Docker, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package digest + +import ( + "fmt" + "hash" + "io" + "regexp" + "strings" +) + +// Digest allows simple protection of hex formatted digest strings, prefixed +// by their algorithm. Strings of type Digest have some guarantee of being in +// the correct format and it provides quick access to the components of a +// digest string. +// +// The following is an example of the contents of Digest types: +// +// sha256:7173b809ca12ec5dee4506cd86be934c4596dd234ee82c0662eac04a8c2c71dc +// +// This allows to abstract the digest behind this type and work only in those +// terms. +type Digest string + +// NewDigest returns a Digest from alg and a hash.Hash object. +func NewDigest(alg Algorithm, h hash.Hash) Digest { + return NewDigestFromBytes(alg, h.Sum(nil)) +} + +// NewDigestFromBytes returns a new digest from the byte contents of p. +// Typically, this can come from hash.Hash.Sum(...) or xxx.SumXXX(...) +// functions. This is also useful for rebuilding digests from binary +// serializations. +func NewDigestFromBytes(alg Algorithm, p []byte) Digest { + return Digest(fmt.Sprintf("%s:%x", alg, p)) +} + +// NewDigestFromHex returns a Digest from alg and a the hex encoded digest. +func NewDigestFromHex(alg, hex string) Digest { + return Digest(fmt.Sprintf("%s:%s", alg, hex)) +} + +// DigestRegexp matches valid digest types. +var DigestRegexp = regexp.MustCompile(`[a-zA-Z0-9-_+.]+:[a-fA-F0-9]+`) + +// DigestRegexpAnchored matches valid digest types, anchored to the start and end of the match. +var DigestRegexpAnchored = regexp.MustCompile(`^` + DigestRegexp.String() + `$`) + +var ( + // ErrDigestInvalidFormat returned when digest format invalid. + ErrDigestInvalidFormat = fmt.Errorf("invalid checksum digest format") + + // ErrDigestInvalidLength returned when digest has invalid length. + ErrDigestInvalidLength = fmt.Errorf("invalid checksum digest length") + + // ErrDigestUnsupported returned when the digest algorithm is unsupported. + ErrDigestUnsupported = fmt.Errorf("unsupported digest algorithm") +) + +// Parse parses s and returns the validated digest object. An error will +// be returned if the format is invalid. +func Parse(s string) (Digest, error) { + d := Digest(s) + return d, d.Validate() +} + +// FromReader consumes the content of rd until io.EOF, returning canonical digest. +func FromReader(rd io.Reader) (Digest, error) { + return Canonical.FromReader(rd) +} + +// FromBytes digests the input and returns a Digest. +func FromBytes(p []byte) Digest { + return Canonical.FromBytes(p) +} + +// FromString digests the input and returns a Digest. +func FromString(s string) Digest { + return Canonical.FromString(s) +} + +// Validate checks that the contents of d is a valid digest, returning an +// error if not. +func (d Digest) Validate() error { + s := string(d) + + i := strings.Index(s, ":") + + // validate i then run through regexp + if i < 0 || i+1 == len(s) || !DigestRegexpAnchored.MatchString(s) { + return ErrDigestInvalidFormat + } + + algorithm := Algorithm(s[:i]) + if !algorithm.Available() { + return ErrDigestUnsupported + } + + // Digests much always be hex-encoded, ensuring that their hex portion will + // always be size*2 + if algorithm.Size()*2 != len(s[i+1:]) { + return ErrDigestInvalidLength + } + + return nil +} + +// Algorithm returns the algorithm portion of the digest. This will panic if +// the underlying digest is not in a valid format. +func (d Digest) Algorithm() Algorithm { + return Algorithm(d[:d.sepIndex()]) +} + +// Verifier returns a writer object that can be used to verify a stream of +// content against the digest. If the digest is invalid, the method will panic. +func (d Digest) Verifier() Verifier { + return hashVerifier{ + hash: d.Algorithm().Hash(), + digest: d, + } +} + +// Hex returns the hex digest portion of the digest. This will panic if the +// underlying digest is not in a valid format. +func (d Digest) Hex() string { + return string(d[d.sepIndex()+1:]) +} + +func (d Digest) String() string { + return string(d) +} + +func (d Digest) sepIndex() int { + i := strings.Index(string(d), ":") + + if i < 0 { + panic(fmt.Sprintf("no ':' separator in digest %q", d)) + } + + return i +} diff --git a/vendor/github.com/opencontainers/go-digest/digester.go b/vendor/github.com/opencontainers/go-digest/digester.go new file mode 100644 index 000000000..36fa2728e --- /dev/null +++ b/vendor/github.com/opencontainers/go-digest/digester.go @@ -0,0 +1,39 @@ +// Copyright 2017 Docker, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package digest + +import "hash" + +// Digester calculates the digest of written data. Writes should go directly +// to the return value of Hash, while calling Digest will return the current +// value of the digest. +type Digester interface { + Hash() hash.Hash // provides direct access to underlying hash instance. + Digest() Digest +} + +// digester provides a simple digester definition that embeds a hasher. +type digester struct { + alg Algorithm + hash hash.Hash +} + +func (d *digester) Hash() hash.Hash { + return d.hash +} + +func (d *digester) Digest() Digest { + return NewDigest(d.alg, d.hash) +} diff --git a/vendor/github.com/opencontainers/go-digest/doc.go b/vendor/github.com/opencontainers/go-digest/doc.go new file mode 100644 index 000000000..491ea1ef1 --- /dev/null +++ b/vendor/github.com/opencontainers/go-digest/doc.go @@ -0,0 +1,56 @@ +// Copyright 2017 Docker, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package digest provides a generalized type to opaquely represent message +// digests and their operations within the registry. The Digest type is +// designed to serve as a flexible identifier in a content-addressable system. +// More importantly, it provides tools and wrappers to work with +// hash.Hash-based digests with little effort. +// +// Basics +// +// The format of a digest is simply a string with two parts, dubbed the +// "algorithm" and the "digest", separated by a colon: +// +// <algorithm>:<digest> +// +// An example of a sha256 digest representation follows: +// +// sha256:7173b809ca12ec5dee4506cd86be934c4596dd234ee82c0662eac04a8c2c71dc +// +// In this case, the string "sha256" is the algorithm and the hex bytes are +// the "digest". +// +// Because the Digest type is simply a string, once a valid Digest is +// obtained, comparisons are cheap, quick and simple to express with the +// standard equality operator. +// +// Verification +// +// The main benefit of using the Digest type is simple verification against a +// given digest. The Verifier interface, modeled after the stdlib hash.Hash +// interface, provides a common write sink for digest verification. After +// writing is complete, calling the Verifier.Verified method will indicate +// whether or not the stream of bytes matches the target digest. +// +// Missing Features +// +// In addition to the above, we intend to add the following features to this +// package: +// +// 1. A Digester type that supports write sink digest calculation. +// +// 2. Suspend and resume of ongoing digest calculations to support efficient digest verification in the registry. +// +package digest diff --git a/vendor/github.com/opencontainers/go-digest/verifiers.go b/vendor/github.com/opencontainers/go-digest/verifiers.go new file mode 100644 index 000000000..32125e918 --- /dev/null +++ b/vendor/github.com/opencontainers/go-digest/verifiers.go @@ -0,0 +1,45 @@ +// Copyright 2017 Docker, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package digest + +import ( + "hash" + "io" +) + +// Verifier presents a general verification interface to be used with message +// digests and other byte stream verifications. Users instantiate a Verifier +// from one of the various methods, write the data under test to it then check +// the result with the Verified method. +type Verifier interface { + io.Writer + + // Verified will return true if the content written to Verifier matches + // the digest. + Verified() bool +} + +type hashVerifier struct { + digest Digest + hash hash.Hash +} + +func (hv hashVerifier) Write(p []byte) (n int, err error) { + return hv.hash.Write(p) +} + +func (hv hashVerifier) Verified() bool { + return hv.digest == NewDigest(hv.digest.Algorithm(), hv.hash) +} diff --git a/vendor/github.com/opencontainers/image-spec/LICENSE b/vendor/github.com/opencontainers/image-spec/LICENSE new file mode 100644 index 000000000..9fdc20fdb --- /dev/null +++ b/vendor/github.com/opencontainers/image-spec/LICENSE @@ -0,0 +1,191 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + Copyright 2016 The Linux Foundation. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/opencontainers/image-spec/README.md b/vendor/github.com/opencontainers/image-spec/README.md new file mode 100644 index 000000000..5ab5554e4 --- /dev/null +++ b/vendor/github.com/opencontainers/image-spec/README.md @@ -0,0 +1,167 @@ +# OCI Image Format Specification +<div> +<a href="https://travis-ci.org/opencontainers/image-spec"> +<img src="https://travis-ci.org/opencontainers/image-spec.svg?branch=master"></img> +</a> +</div> + +The OCI Image Format project creates and maintains the software shipping container image format spec (OCI Image Format). + +**[The specification can be found here](spec.md).** + +This repository also provides [Go types](specs-go), [intra-blob validation tooling, and JSON Schema](schema). +The Go types and validation should be compatible with the current Go release; earlier Go releases are not supported. + +Additional documentation about how this group operates: + +- [Code of Conduct](https://github.com/opencontainers/tob/blob/d2f9d68c1332870e40693fe077d311e0742bc73d/code-of-conduct.md) +- [Roadmap](#roadmap) +- [Releases](RELEASES.md) +- [Project Documentation](project.md) + +The _optional_ and _base_ layers of all OCI projects are tracked in the [OCI Scope Table](https://www.opencontainers.org/about/oci-scope-table). + +## Running an OCI Image + +The OCI Image Format partner project is the [OCI Runtime Spec project](https://github.com/opencontainers/runtime-spec). +The Runtime Specification outlines how to run a "[filesystem bundle](https://github.com/opencontainers/runtime-spec/blob/master/bundle.md)" that is unpacked on disk. +At a high-level an OCI implementation would download an OCI Image then unpack that image into an OCI Runtime filesystem bundle. +At this point the OCI Runtime Bundle would be run by an OCI Runtime. + +This entire workflow supports the UX that users have come to expect from container engines like Docker and rkt: primarily, the ability to run an image with no additional arguments: + +* docker run example.com/org/app:v1.0.0 +* rkt run example.com/org/app,version=v1.0.0 + +To support this UX the OCI Image Format contains sufficient information to launch the application on the target platform (e.g. command, arguments, environment variables, etc). + +## FAQ + +**Q: Why doesn't this project mention distribution?** + +A: Distribution, for example using HTTP as both Docker v2.2 and AppC do today, is currently out of scope on the [OCI Scope Table](https://www.opencontainers.org/about/oci-scope-table). +There has been [some discussion on the TOB mailing list](https://groups.google.com/a/opencontainers.org/d/msg/tob/A3JnmI-D-6Y/tLuptPDHAgAJ) to make distribution an optional layer, but this topic is a work in progress. + +**Q: What happens to AppC or Docker Image Formats?** + +A: Existing formats can continue to be a proving ground for technologies, as needed. +The OCI Image Format project strives to provide a dependable open specification that can be shared between different tools and be evolved for years or decades of compatibility; as the deb and rpm format have. + +Find more [FAQ on the OCI site](https://www.opencontainers.org/faq). + +## Roadmap + +The [GitHub milestones](https://github.com/opencontainers/image-spec/milestones) lay out the path to the OCI v1.0.0 release in late 2016. + +# Contributing + +Development happens on GitHub for the spec. +Issues are used for bugs and actionable items and longer discussions can happen on the [mailing list](#mailing-list). + +The specification and code is licensed under the Apache 2.0 license found in the `LICENSE` file of this repository. + +## Discuss your design + +The project welcomes submissions, but please let everyone know what you are working on. + +Before undertaking a nontrivial change to this specification, send mail to the [mailing list](#mailing-list) to discuss what you plan to do. +This gives everyone a chance to validate the design, helps prevent duplication of effort, and ensures that the idea fits. +It also guarantees that the design is sound before code is written; a GitHub pull-request is not the place for high-level discussions. + +Typos and grammatical errors can go straight to a pull-request. +When in doubt, start on the [mailing-list](#mailing-list). + +## Weekly Call + +The contributors and maintainers of all OCI projects have a weekly meeting Wednesdays at 2:00 PM (USA Pacific). +Everyone is welcome to participate via [UberConference web][UberConference] or audio-only: +1-415-968-0849 (no PIN needed). +An initial agenda will be posted to the [mailing list](#mailing-list) earlier in the week, and everyone is welcome to propose additional topics or suggest other agenda alterations there. +Minutes are posted to the [mailing list](#mailing-list) and minutes from past calls are archived [here][minutes]. + +## Mailing List + +You can subscribe and join the mailing list on [Google Groups](https://groups.google.com/a/opencontainers.org/forum/#!forum/dev). + +## IRC + +OCI discussion happens on #opencontainers on Freenode ([logs][irc-logs]). + +## Markdown style + +To keep consistency throughout the Markdown files in the Open Container spec all files should be formatted one sentence per line. +This fixes two things: it makes diffing easier with git and it resolves fights about line wrapping length. +For example, this paragraph will span three lines in the Markdown source. + +## Git commit + +### Sign your work + +The sign-off is a simple line at the end of the explanation for the patch, which certifies that you wrote it or otherwise have the right to pass it on as an open-source patch. +The rules are pretty simple: if you can certify the below (from [developercertificate.org](http://developercertificate.org/)): + +``` +Developer Certificate of Origin +Version 1.1 + +Copyright (C) 2004, 2006 The Linux Foundation and its contributors. +660 York Street, Suite 102, +San Francisco, CA 94110 USA + +Everyone is permitted to copy and distribute verbatim copies of this +license document, but changing it is not allowed. + + +Developer's Certificate of Origin 1.1 + +By making a contribution to this project, I certify that: + +(a) The contribution was created in whole or in part by me and I + have the right to submit it under the open source license + indicated in the file; or + +(b) The contribution is based upon previous work that, to the best + of my knowledge, is covered under an appropriate open source + license and I have the right under that license to submit that + work with modifications, whether created in whole or in part + by me, under the same open source license (unless I am + permitted to submit under a different license), as indicated + in the file; or + +(c) The contribution was provided directly to me by some other + person who certified (a), (b) or (c) and I have not modified + it. + +(d) I understand and agree that this project and the contribution + are public and that a record of the contribution (including all + personal information I submit with it, including my sign-off) is + maintained indefinitely and may be redistributed consistent with + this project or the open source license(s) involved. +``` + +then you just add a line to every git commit message: + + Signed-off-by: Joe Smith <joe@gmail.com> + +using your real name (sorry, no pseudonyms or anonymous contributions.) + +You can add the sign off when creating the git commit via `git commit -s`. + +### Commit Style + +Simple house-keeping for clean git history. +Read more on [How to Write a Git Commit Message](http://chris.beams.io/posts/git-commit/) or the Discussion section of [`git-commit(1)`](http://git-scm.com/docs/git-commit). + +1. Separate the subject from body with a blank line +2. Limit the subject line to 50 characters +3. Capitalize the subject line +4. Do not end the subject line with a period +5. Use the imperative mood in the subject line +6. Wrap the body at 72 characters +7. Use the body to explain what and why vs. how + * If there was important/useful/essential conversation or information, copy or include a reference +8. When possible, one keyword to scope the change in the subject (i.e. "README: ...", "runtime: ...") + + +[UberConference]: https://www.uberconference.com/opencontainers +[irc-logs]: http://ircbot.wl.linuxfoundation.org/eavesdrop/%23opencontainers/ +[minutes]: http://ircbot.wl.linuxfoundation.org/meetings/opencontainers/ diff --git a/vendor/github.com/opencontainers/image-spec/specs-go/v1/annotations.go b/vendor/github.com/opencontainers/image-spec/specs-go/v1/annotations.go new file mode 100644 index 000000000..35d810895 --- /dev/null +++ b/vendor/github.com/opencontainers/image-spec/specs-go/v1/annotations.go @@ -0,0 +1,56 @@ +// Copyright 2016 The Linux Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package v1 + +const ( + // AnnotationCreated is the annotation key for the date and time on which the image was built (date-time string as defined by RFC 3339). + AnnotationCreated = "org.opencontainers.image.created" + + // AnnotationAuthors is the annotation key for the contact details of the people or organization responsible for the image (freeform string). + AnnotationAuthors = "org.opencontainers.image.authors" + + // AnnotationURL is the annotation key for the URL to find more information on the image. + AnnotationURL = "org.opencontainers.image.url" + + // AnnotationDocumentation is the annotation key for the URL to get documentation on the image. + AnnotationDocumentation = "org.opencontainers.image.documentation" + + // AnnotationSource is the annotation key for the URL to get source code for building the image. + AnnotationSource = "org.opencontainers.image.source" + + // AnnotationVersion is the annotation key for the version of the packaged software. + // The version MAY match a label or tag in the source code repository. + // The version MAY be Semantic versioning-compatible. + AnnotationVersion = "org.opencontainers.image.version" + + // AnnotationRevision is the annotation key for the source control revision identifier for the packaged software. + AnnotationRevision = "org.opencontainers.image.revision" + + // AnnotationVendor is the annotation key for the name of the distributing entity, organization or individual. + AnnotationVendor = "org.opencontainers.image.vendor" + + // AnnotationLicenses is the annotation key for the license(s) under which contained software is distributed as an SPDX License Expression. + AnnotationLicenses = "org.opencontainers.image.licenses" + + // AnnotationRefName is the annotation key for the name of the reference for a target. + // SHOULD only be considered valid when on descriptors on `index.json` within image layout. + AnnotationRefName = "org.opencontainers.image.ref.name" + + // AnnotationTitle is the annotation key for the human-readable title of the image. + AnnotationTitle = "org.opencontainers.image.title" + + // AnnotationDescription is the annotation key for the human-readable description of the software packaged in the image. + AnnotationDescription = "org.opencontainers.image.description" +) diff --git a/vendor/github.com/opencontainers/image-spec/specs-go/v1/config.go b/vendor/github.com/opencontainers/image-spec/specs-go/v1/config.go new file mode 100644 index 000000000..fe799bd69 --- /dev/null +++ b/vendor/github.com/opencontainers/image-spec/specs-go/v1/config.go @@ -0,0 +1,103 @@ +// Copyright 2016 The Linux Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package v1 + +import ( + "time" + + digest "github.com/opencontainers/go-digest" +) + +// ImageConfig defines the execution parameters which should be used as a base when running a container using an image. +type ImageConfig struct { + // User defines the username or UID which the process in the container should run as. + User string `json:"User,omitempty"` + + // ExposedPorts a set of ports to expose from a container running this image. + ExposedPorts map[string]struct{} `json:"ExposedPorts,omitempty"` + + // Env is a list of environment variables to be used in a container. + Env []string `json:"Env,omitempty"` + + // Entrypoint defines a list of arguments to use as the command to execute when the container starts. + Entrypoint []string `json:"Entrypoint,omitempty"` + + // Cmd defines the default arguments to the entrypoint of the container. + Cmd []string `json:"Cmd,omitempty"` + + // Volumes is a set of directories describing where the process is likely write data specific to a container instance. + Volumes map[string]struct{} `json:"Volumes,omitempty"` + + // WorkingDir sets the current working directory of the entrypoint process in the container. + WorkingDir string `json:"WorkingDir,omitempty"` + + // Labels contains arbitrary metadata for the container. + Labels map[string]string `json:"Labels,omitempty"` + + // StopSignal contains the system call signal that will be sent to the container to exit. + StopSignal string `json:"StopSignal,omitempty"` +} + +// RootFS describes a layer content addresses +type RootFS struct { + // Type is the type of the rootfs. + Type string `json:"type"` + + // DiffIDs is an array of layer content hashes (DiffIDs), in order from bottom-most to top-most. + DiffIDs []digest.Digest `json:"diff_ids"` +} + +// History describes the history of a layer. +type History struct { + // Created is the combined date and time at which the layer was created, formatted as defined by RFC 3339, section 5.6. + Created *time.Time `json:"created,omitempty"` + + // CreatedBy is the command which created the layer. + CreatedBy string `json:"created_by,omitempty"` + + // Author is the author of the build point. + Author string `json:"author,omitempty"` + + // Comment is a custom message set when creating the layer. + Comment string `json:"comment,omitempty"` + + // EmptyLayer is used to mark if the history item created a filesystem diff. + EmptyLayer bool `json:"empty_layer,omitempty"` +} + +// Image is the JSON structure which describes some basic information about the image. +// This provides the `application/vnd.oci.image.config.v1+json` mediatype when marshalled to JSON. +type Image struct { + // Created is the combined date and time at which the image was created, formatted as defined by RFC 3339, section 5.6. + Created *time.Time `json:"created,omitempty"` + + // Author defines the name and/or email address of the person or entity which created and is responsible for maintaining the image. + Author string `json:"author,omitempty"` + + // Architecture is the CPU architecture which the binaries in this image are built to run on. + Architecture string `json:"architecture"` + + // OS is the name of the operating system which the image is built to run on. + OS string `json:"os"` + + // Config defines the execution parameters which should be used as a base when running a container using the image. + Config ImageConfig `json:"config,omitempty"` + + // RootFS references the layer content addresses used by the image. + RootFS RootFS `json:"rootfs"` + + // History describes the history of each layer. + History []History `json:"history,omitempty"` +} diff --git a/vendor/github.com/opencontainers/image-spec/specs-go/v1/descriptor.go b/vendor/github.com/opencontainers/image-spec/specs-go/v1/descriptor.go new file mode 100644 index 000000000..6e442a085 --- /dev/null +++ b/vendor/github.com/opencontainers/image-spec/specs-go/v1/descriptor.go @@ -0,0 +1,64 @@ +// Copyright 2016 The Linux Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package v1 + +import digest "github.com/opencontainers/go-digest" + +// Descriptor describes the disposition of targeted content. +// This structure provides `application/vnd.oci.descriptor.v1+json` mediatype +// when marshalled to JSON. +type Descriptor struct { + // MediaType is the media type of the object this schema refers to. + MediaType string `json:"mediaType,omitempty"` + + // Digest is the digest of the targeted content. + Digest digest.Digest `json:"digest"` + + // Size specifies the size in bytes of the blob. + Size int64 `json:"size"` + + // URLs specifies a list of URLs from which this object MAY be downloaded + URLs []string `json:"urls,omitempty"` + + // Annotations contains arbitrary metadata relating to the targeted content. + Annotations map[string]string `json:"annotations,omitempty"` + + // Platform describes the platform which the image in the manifest runs on. + // + // This should only be used when referring to a manifest. + Platform *Platform `json:"platform,omitempty"` +} + +// Platform describes the platform which the image in the manifest runs on. +type Platform struct { + // Architecture field specifies the CPU architecture, for example + // `amd64` or `ppc64`. + Architecture string `json:"architecture"` + + // OS specifies the operating system, for example `linux` or `windows`. + OS string `json:"os"` + + // OSVersion is an optional field specifying the operating system + // version, for example on Windows `10.0.14393.1066`. + OSVersion string `json:"os.version,omitempty"` + + // OSFeatures is an optional field specifying an array of strings, + // each listing a required OS feature (for example on Windows `win32k`). + OSFeatures []string `json:"os.features,omitempty"` + + // Variant is an optional field specifying a variant of the CPU, for + // example `v7` to specify ARMv7 when architecture is `arm`. + Variant string `json:"variant,omitempty"` +} diff --git a/vendor/github.com/opencontainers/image-spec/specs-go/v1/index.go b/vendor/github.com/opencontainers/image-spec/specs-go/v1/index.go new file mode 100644 index 000000000..4e6c4b236 --- /dev/null +++ b/vendor/github.com/opencontainers/image-spec/specs-go/v1/index.go @@ -0,0 +1,29 @@ +// Copyright 2016 The Linux Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package v1 + +import "github.com/opencontainers/image-spec/specs-go" + +// Index references manifests for various platforms. +// This structure provides `application/vnd.oci.image.index.v1+json` mediatype when marshalled to JSON. +type Index struct { + specs.Versioned + + // Manifests references platform specific manifests. + Manifests []Descriptor `json:"manifests"` + + // Annotations contains arbitrary metadata for the image index. + Annotations map[string]string `json:"annotations,omitempty"` +} diff --git a/vendor/github.com/opencontainers/image-spec/specs-go/v1/layout.go b/vendor/github.com/opencontainers/image-spec/specs-go/v1/layout.go new file mode 100644 index 000000000..fc79e9e0d --- /dev/null +++ b/vendor/github.com/opencontainers/image-spec/specs-go/v1/layout.go @@ -0,0 +1,28 @@ +// Copyright 2016 The Linux Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package v1 + +const ( + // ImageLayoutFile is the file name of oci image layout file + ImageLayoutFile = "oci-layout" + // ImageLayoutVersion is the version of ImageLayout + ImageLayoutVersion = "1.0.0" +) + +// ImageLayout is the structure in the "oci-layout" file, found in the root +// of an OCI Image-layout directory. +type ImageLayout struct { + Version string `json:"imageLayoutVersion"` +} diff --git a/vendor/github.com/opencontainers/image-spec/specs-go/v1/manifest.go b/vendor/github.com/opencontainers/image-spec/specs-go/v1/manifest.go new file mode 100644 index 000000000..7ff32c40b --- /dev/null +++ b/vendor/github.com/opencontainers/image-spec/specs-go/v1/manifest.go @@ -0,0 +1,32 @@ +// Copyright 2016 The Linux Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package v1 + +import "github.com/opencontainers/image-spec/specs-go" + +// Manifest provides `application/vnd.oci.image.manifest.v1+json` mediatype structure when marshalled to JSON. +type Manifest struct { + specs.Versioned + + // Config references a configuration object for a container, by digest. + // The referenced configuration object is a JSON blob that the runtime uses to set up the container. + Config Descriptor `json:"config"` + + // Layers is an indexed list of layers referenced by the manifest. + Layers []Descriptor `json:"layers"` + + // Annotations contains arbitrary metadata for the image manifest. + Annotations map[string]string `json:"annotations,omitempty"` +} diff --git a/vendor/github.com/opencontainers/image-spec/specs-go/v1/mediatype.go b/vendor/github.com/opencontainers/image-spec/specs-go/v1/mediatype.go new file mode 100644 index 000000000..bad7bb97f --- /dev/null +++ b/vendor/github.com/opencontainers/image-spec/specs-go/v1/mediatype.go @@ -0,0 +1,48 @@ +// Copyright 2016 The Linux Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package v1 + +const ( + // MediaTypeDescriptor specifies the media type for a content descriptor. + MediaTypeDescriptor = "application/vnd.oci.descriptor.v1+json" + + // MediaTypeLayoutHeader specifies the media type for the oci-layout. + MediaTypeLayoutHeader = "application/vnd.oci.layout.header.v1+json" + + // MediaTypeImageManifest specifies the media type for an image manifest. + MediaTypeImageManifest = "application/vnd.oci.image.manifest.v1+json" + + // MediaTypeImageIndex specifies the media type for an image index. + MediaTypeImageIndex = "application/vnd.oci.image.index.v1+json" + + // MediaTypeImageLayer is the media type used for layers referenced by the manifest. + MediaTypeImageLayer = "application/vnd.oci.image.layer.v1.tar" + + // MediaTypeImageLayerGzip is the media type used for gzipped layers + // referenced by the manifest. + MediaTypeImageLayerGzip = "application/vnd.oci.image.layer.v1.tar+gzip" + + // MediaTypeImageLayerNonDistributable is the media type for layers referenced by + // the manifest but with distribution restrictions. + MediaTypeImageLayerNonDistributable = "application/vnd.oci.image.layer.nondistributable.v1.tar" + + // MediaTypeImageLayerNonDistributableGzip is the media type for + // gzipped layers referenced by the manifest but with distribution + // restrictions. + MediaTypeImageLayerNonDistributableGzip = "application/vnd.oci.image.layer.nondistributable.v1.tar+gzip" + + // MediaTypeImageConfig specifies the media type for the image configuration. + MediaTypeImageConfig = "application/vnd.oci.image.config.v1+json" +) diff --git a/vendor/github.com/opencontainers/image-spec/specs-go/version.go b/vendor/github.com/opencontainers/image-spec/specs-go/version.go new file mode 100644 index 000000000..e3eee29b4 --- /dev/null +++ b/vendor/github.com/opencontainers/image-spec/specs-go/version.go @@ -0,0 +1,32 @@ +// Copyright 2016 The Linux Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package specs + +import "fmt" + +const ( + // VersionMajor is for an API incompatible changes + VersionMajor = 1 + // VersionMinor is for functionality in a backwards-compatible manner + VersionMinor = 0 + // VersionPatch is for backwards-compatible bug fixes + VersionPatch = 0 + + // VersionDev indicates development branch. Releases will be empty string. + VersionDev = "" +) + +// Version is the specification version that the package types support. +var Version = fmt.Sprintf("%d.%d.%d%s", VersionMajor, VersionMinor, VersionPatch, VersionDev) diff --git a/vendor/github.com/opencontainers/image-spec/specs-go/versioned.go b/vendor/github.com/opencontainers/image-spec/specs-go/versioned.go new file mode 100644 index 000000000..58a1510f3 --- /dev/null +++ b/vendor/github.com/opencontainers/image-spec/specs-go/versioned.go @@ -0,0 +1,23 @@ +// Copyright 2016 The Linux Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package specs + +// Versioned provides a struct with the manifest schemaVersion and mediaType. +// Incoming content with unknown schema version can be decoded against this +// struct to check the version. +type Versioned struct { + // SchemaVersion is the image manifest schema that this image follows + SchemaVersion int `json:"schemaVersion"` +} diff --git a/vendor/github.com/opencontainers/runc/LICENSE b/vendor/github.com/opencontainers/runc/LICENSE new file mode 100644 index 000000000..27448585a --- /dev/null +++ b/vendor/github.com/opencontainers/runc/LICENSE @@ -0,0 +1,191 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + Copyright 2014 Docker, Inc. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/opencontainers/runc/NOTICE b/vendor/github.com/opencontainers/runc/NOTICE new file mode 100644 index 000000000..5c97abce4 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/NOTICE @@ -0,0 +1,17 @@ +runc + +Copyright 2012-2015 Docker, Inc. + +This product includes software developed at Docker, Inc. (http://www.docker.com). + +The following is courtesy of our legal counsel: + + +Use and transfer of Docker may be subject to certain restrictions by the +United States and other governments. +It is your responsibility to ensure that your use and/or transfer does not +violate applicable laws. + +For more information, please see http://www.bis.doc.gov + +See also http://www.apache.org/dev/crypto.html and/or seek legal counsel. diff --git a/vendor/github.com/opencontainers/runc/README.md b/vendor/github.com/opencontainers/runc/README.md new file mode 100644 index 000000000..eabfb982b --- /dev/null +++ b/vendor/github.com/opencontainers/runc/README.md @@ -0,0 +1,244 @@ +# runc + +[![Build Status](https://travis-ci.org/opencontainers/runc.svg?branch=master)](https://travis-ci.org/opencontainers/runc) +[![Go Report Card](https://goreportcard.com/badge/github.com/opencontainers/runc)](https://goreportcard.com/report/github.com/opencontainers/runc) +[![GoDoc](https://godoc.org/github.com/opencontainers/runc?status.svg)](https://godoc.org/github.com/opencontainers/runc) + +## Introduction + +`runc` is a CLI tool for spawning and running containers according to the OCI specification. + +## Releases + +`runc` depends on and tracks the [runtime-spec](https://github.com/opencontainers/runtime-spec) repository. +We will try to make sure that `runc` and the OCI specification major versions stay in lockstep. +This means that `runc` 1.0.0 should implement the 1.0 version of the specification. + +You can find official releases of `runc` on the [release](https://github.com/opencontainers/runc/releases) page. + +### Security + +If you wish to report a security issue, please disclose the issue responsibly +to security@opencontainers.org. + +## Building + +`runc` currently supports the Linux platform with various architecture support. +It must be built with Go version 1.6 or higher in order for some features to function properly. + +In order to enable seccomp support you will need to install `libseccomp` on your platform. +> e.g. `libseccomp-devel` for CentOS, or `libseccomp-dev` for Ubuntu + +Otherwise, if you do not want to build `runc` with seccomp support you can add `BUILDTAGS=""` when running make. + +```bash +# create a 'github.com/opencontainers' in your GOPATH/src +cd github.com/opencontainers +git clone https://github.com/opencontainers/runc +cd runc + +make +sudo make install +``` + +`runc` will be installed to `/usr/local/sbin/runc` on your system. + +#### Build Tags + +`runc` supports optional build tags for compiling support of various features. +To add build tags to the make option the `BUILDTAGS` variable must be set. + +```bash +make BUILDTAGS='seccomp apparmor' +``` + +| Build Tag | Feature | Dependency | +|-----------|------------------------------------|-------------| +| seccomp | Syscall filtering | libseccomp | +| selinux | selinux process and mount labeling | <none> | +| apparmor | apparmor profile support | libapparmor | +| ambient | ambient capability support | kernel 4.3 | + + +### Running the test suite + +`runc` currently supports running its test suite via Docker. +To run the suite just type `make test`. + +```bash +make test +``` + +There are additional make targets for running the tests outside of a container but this is not recommended as the tests are written with the expectation that they can write and remove anywhere. + +You can run a specific test case by setting the `TESTFLAGS` variable. + +```bash +# make test TESTFLAGS="-run=SomeTestFunction" +``` + +### Dependencies Management + +`runc` uses [vndr](https://github.com/LK4D4/vndr) for dependencies management. +Please refer to [vndr](https://github.com/LK4D4/vndr) for how to add or update +new dependencies. + +## Using runc + +### Creating an OCI Bundle + +In order to use runc you must have your container in the format of an OCI bundle. +If you have Docker installed you can use its `export` method to acquire a root filesystem from an existing Docker container. + +```bash +# create the top most bundle directory +mkdir /mycontainer +cd /mycontainer + +# create the rootfs directory +mkdir rootfs + +# export busybox via Docker into the rootfs directory +docker export $(docker create busybox) | tar -C rootfs -xvf - +``` + +After a root filesystem is populated you just generate a spec in the format of a `config.json` file inside your bundle. +`runc` provides a `spec` command to generate a base template spec that you are then able to edit. +To find features and documentation for fields in the spec please refer to the [specs](https://github.com/opencontainers/runtime-spec) repository. + +```bash +runc spec +``` + +### Running Containers + +Assuming you have an OCI bundle from the previous step you can execute the container in two different ways. + +The first way is to use the convenience command `run` that will handle creating, starting, and deleting the container after it exits. + +```bash +# run as root +cd /mycontainer +runc run mycontainerid +``` + +If you used the unmodified `runc spec` template this should give you a `sh` session inside the container. + +The second way to start a container is using the specs lifecycle operations. +This gives you more power over how the container is created and managed while it is running. +This will also launch the container in the background so you will have to edit the `config.json` to remove the `terminal` setting for the simple examples here. +Your process field in the `config.json` should look like this below with `"terminal": false` and `"args": ["sleep", "5"]`. + + +```json + "process": { + "terminal": false, + "user": { + "uid": 0, + "gid": 0 + }, + "args": [ + "sleep", "5" + ], + "env": [ + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + "TERM=xterm" + ], + "cwd": "/", + "capabilities": { + "bounding": [ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE" + ], + "effective": [ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE" + ], + "inheritable": [ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE" + ], + "permitted": [ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE" + ], + "ambient": [ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE" + ] + }, + "rlimits": [ + { + "type": "RLIMIT_NOFILE", + "hard": 1024, + "soft": 1024 + } + ], + "noNewPrivileges": true + }, +``` + +Now we can go through the lifecycle operations in your shell. + + +```bash +# run as root +cd /mycontainer +runc create mycontainerid + +# view the container is created and in the "created" state +runc list + +# start the process inside the container +runc start mycontainerid + +# after 5 seconds view that the container has exited and is now in the stopped state +runc list + +# now delete the container +runc delete mycontainerid +``` + +This adds more complexity but allows higher level systems to manage runc and provides points in the containers creation to setup various settings after the container has created and/or before it is deleted. +This is commonly used to setup the container's network stack after `create` but before `start` where the user's defined process will be running. + +#### Rootless containers +`runc` has the ability to run containers without root privileges. This is called `rootless`. You need to pass some parameters to `runc` in order to run rootless containers. See below and compare with the previous version. Run the following commands as an ordinary user: +```bash +# Same as the first example +mkdir ~/mycontainer +cd ~/mycontainer +mkdir rootfs +docker export $(docker create busybox) | tar -C rootfs -xvf - + +# The --rootless parameter instructs runc spec to generate a configuration for a rootless container, which will allow you to run the container as a non-root user. +runc spec --rootless + +# The --root parameter tells runc where to store the container state. It must be writable by the user. +runc --root /tmp/runc run mycontainerid +``` + +#### Supervisors + +`runc` can be used with process supervisors and init systems to ensure that containers are restarted when they exit. +An example systemd unit file looks something like this. + +```systemd +[Unit] +Description=Start My Container + +[Service] +Type=forking +ExecStart=/usr/local/sbin/runc run -d --pid-file /run/mycontainerid.pid mycontainerid +ExecStopPost=/usr/local/sbin/runc delete mycontainerid +WorkingDirectory=/mycontainer +PIDFile=/run/mycontainerid.pid + +[Install] +WantedBy=multi-user.target +``` diff --git a/vendor/github.com/opencontainers/runc/libcontainer/README.md b/vendor/github.com/opencontainers/runc/libcontainer/README.md new file mode 100644 index 000000000..42f3efe56 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/README.md @@ -0,0 +1,328 @@ +# libcontainer + +[![GoDoc](https://godoc.org/github.com/opencontainers/runc/libcontainer?status.svg)](https://godoc.org/github.com/opencontainers/runc/libcontainer) + +Libcontainer provides a native Go implementation for creating containers +with namespaces, cgroups, capabilities, and filesystem access controls. +It allows you to manage the lifecycle of the container performing additional operations +after the container is created. + + +#### Container +A container is a self contained execution environment that shares the kernel of the +host system and which is (optionally) isolated from other containers in the system. + +#### Using libcontainer + +Because containers are spawned in a two step process you will need a binary that +will be executed as the init process for the container. In libcontainer, we use +the current binary (/proc/self/exe) to be executed as the init process, and use +arg "init", we call the first step process "bootstrap", so you always need a "init" +function as the entry of "bootstrap". + +In addition to the go init function the early stage bootstrap is handled by importing +[nsenter](https://github.com/opencontainers/runc/blob/master/libcontainer/nsenter/README.md). + +```go +import ( + _ "github.com/opencontainers/runc/libcontainer/nsenter" +) + +func init() { + if len(os.Args) > 1 && os.Args[1] == "init" { + runtime.GOMAXPROCS(1) + runtime.LockOSThread() + factory, _ := libcontainer.New("") + if err := factory.StartInitialization(); err != nil { + logrus.Fatal(err) + } + panic("--this line should have never been executed, congratulations--") + } +} +``` + +Then to create a container you first have to initialize an instance of a factory +that will handle the creation and initialization for a container. + +```go +factory, err := libcontainer.New("/var/lib/container", libcontainer.Cgroupfs, libcontainer.InitArgs(os.Args[0], "init")) +if err != nil { + logrus.Fatal(err) + return +} +``` + +Once you have an instance of the factory created we can create a configuration +struct describing how the container is to be created. A sample would look similar to this: + +```go +defaultMountFlags := unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV +config := &configs.Config{ + Rootfs: "/your/path/to/rootfs", + Capabilities: &configs.Capabilities{ + Bounding: []string{ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE", + }, + Effective: []string{ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE", + }, + Inheritable: []string{ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE", + }, + Permitted: []string{ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE", + }, + Ambient: []string{ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE", + }, + }, + Namespaces: configs.Namespaces([]configs.Namespace{ + {Type: configs.NEWNS}, + {Type: configs.NEWUTS}, + {Type: configs.NEWIPC}, + {Type: configs.NEWPID}, + {Type: configs.NEWUSER}, + {Type: configs.NEWNET}, + }), + Cgroups: &configs.Cgroup{ + Name: "test-container", + Parent: "system", + Resources: &configs.Resources{ + MemorySwappiness: nil, + AllowAllDevices: nil, + AllowedDevices: configs.DefaultAllowedDevices, + }, + }, + MaskPaths: []string{ + "/proc/kcore", + "/sys/firmware", + }, + ReadonlyPaths: []string{ + "/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus", + }, + Devices: configs.DefaultAutoCreatedDevices, + Hostname: "testing", + Mounts: []*configs.Mount{ + { + Source: "proc", + Destination: "/proc", + Device: "proc", + Flags: defaultMountFlags, + }, + { + Source: "tmpfs", + Destination: "/dev", + Device: "tmpfs", + Flags: unix.MS_NOSUID | unix.MS_STRICTATIME, + Data: "mode=755", + }, + { + Source: "devpts", + Destination: "/dev/pts", + Device: "devpts", + Flags: unix.MS_NOSUID | unix.MS_NOEXEC, + Data: "newinstance,ptmxmode=0666,mode=0620,gid=5", + }, + { + Device: "tmpfs", + Source: "shm", + Destination: "/dev/shm", + Data: "mode=1777,size=65536k", + Flags: defaultMountFlags, + }, + { + Source: "mqueue", + Destination: "/dev/mqueue", + Device: "mqueue", + Flags: defaultMountFlags, + }, + { + Source: "sysfs", + Destination: "/sys", + Device: "sysfs", + Flags: defaultMountFlags | unix.MS_RDONLY, + }, + }, + UidMappings: []configs.IDMap{ + { + ContainerID: 0, + HostID: 1000, + Size: 65536, + }, + }, + GidMappings: []configs.IDMap{ + { + ContainerID: 0, + HostID: 1000, + Size: 65536, + }, + }, + Networks: []*configs.Network{ + { + Type: "loopback", + Address: "127.0.0.1/0", + Gateway: "localhost", + }, + }, + Rlimits: []configs.Rlimit{ + { + Type: unix.RLIMIT_NOFILE, + Hard: uint64(1025), + Soft: uint64(1025), + }, + }, +} +``` + +Once you have the configuration populated you can create a container: + +```go +container, err := factory.Create("container-id", config) +if err != nil { + logrus.Fatal(err) + return +} +``` + +To spawn bash as the initial process inside the container and have the +processes pid returned in order to wait, signal, or kill the process: + +```go +process := &libcontainer.Process{ + Args: []string{"/bin/bash"}, + Env: []string{"PATH=/bin"}, + User: "daemon", + Stdin: os.Stdin, + Stdout: os.Stdout, + Stderr: os.Stderr, +} + +err := container.Run(process) +if err != nil { + container.Destroy() + logrus.Fatal(err) + return +} + +// wait for the process to finish. +_, err := process.Wait() +if err != nil { + logrus.Fatal(err) +} + +// destroy the container. +container.Destroy() +``` + +Additional ways to interact with a running container are: + +```go +// return all the pids for all processes running inside the container. +processes, err := container.Processes() + +// get detailed cpu, memory, io, and network statistics for the container and +// it's processes. +stats, err := container.Stats() + +// pause all processes inside the container. +container.Pause() + +// resume all paused processes. +container.Resume() + +// send signal to container's init process. +container.Signal(signal) + +// update container resource constraints. +container.Set(config) + +// get current status of the container. +status, err := container.Status() + +// get current container's state information. +state, err := container.State() +``` + + +#### Checkpoint & Restore + +libcontainer now integrates [CRIU](http://criu.org/) for checkpointing and restoring containers. +This let's you save the state of a process running inside a container to disk, and then restore +that state into a new process, on the same machine or on another machine. + +`criu` version 1.5.2 or higher is required to use checkpoint and restore. +If you don't already have `criu` installed, you can build it from source, following the +[online instructions](http://criu.org/Installation). `criu` is also installed in the docker image +generated when building libcontainer with docker. + + +## Copyright and license + +Code and documentation copyright 2014 Docker, inc. Code released under the Apache 2.0 license. +Docs released under Creative commons. + diff --git a/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor.go b/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor.go new file mode 100644 index 000000000..82ed1a68a --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor.go @@ -0,0 +1,39 @@ +// +build apparmor,linux + +package apparmor + +// #cgo LDFLAGS: -lapparmor +// #include <sys/apparmor.h> +// #include <stdlib.h> +import "C" +import ( + "fmt" + "io/ioutil" + "os" + "unsafe" +) + +// IsEnabled returns true if apparmor is enabled for the host. +func IsEnabled() bool { + if _, err := os.Stat("/sys/kernel/security/apparmor"); err == nil && os.Getenv("container") == "" { + if _, err = os.Stat("/sbin/apparmor_parser"); err == nil { + buf, err := ioutil.ReadFile("/sys/module/apparmor/parameters/enabled") + return err == nil && len(buf) > 1 && buf[0] == 'Y' + } + } + return false +} + +// ApplyProfile will apply the profile with the specified name to the process after +// the next exec. +func ApplyProfile(name string) error { + if name == "" { + return nil + } + cName := C.CString(name) + defer C.free(unsafe.Pointer(cName)) + if _, err := C.aa_change_onexec(cName); err != nil { + return fmt.Errorf("apparmor failed to apply profile: %s", err) + } + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_disabled.go b/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_disabled.go new file mode 100644 index 000000000..d4110cf0b --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_disabled.go @@ -0,0 +1,20 @@ +// +build !apparmor !linux + +package apparmor + +import ( + "errors" +) + +var ErrApparmorNotEnabled = errors.New("apparmor: config provided but apparmor not supported") + +func IsEnabled() bool { + return false +} + +func ApplyProfile(name string) error { + if name != "" { + return ErrApparmorNotEnabled + } + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/capabilities_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/capabilities_linux.go new file mode 100644 index 000000000..8981b2a2f --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/capabilities_linux.go @@ -0,0 +1,114 @@ +// +build linux + +package libcontainer + +import ( + "fmt" + "os" + "strings" + + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/syndtr/gocapability/capability" +) + +const allCapabilityTypes = capability.CAPS | capability.BOUNDS | capability.AMBS + +var capabilityMap map[string]capability.Cap + +func init() { + capabilityMap = make(map[string]capability.Cap) + last := capability.CAP_LAST_CAP + // workaround for RHEL6 which has no /proc/sys/kernel/cap_last_cap + if last == capability.Cap(63) { + last = capability.CAP_BLOCK_SUSPEND + } + for _, cap := range capability.List() { + if cap > last { + continue + } + capKey := fmt.Sprintf("CAP_%s", strings.ToUpper(cap.String())) + capabilityMap[capKey] = cap + } +} + +func newContainerCapList(capConfig *configs.Capabilities) (*containerCapabilities, error) { + bounding := []capability.Cap{} + for _, c := range capConfig.Bounding { + v, ok := capabilityMap[c] + if !ok { + return nil, fmt.Errorf("unknown capability %q", c) + } + bounding = append(bounding, v) + } + effective := []capability.Cap{} + for _, c := range capConfig.Effective { + v, ok := capabilityMap[c] + if !ok { + return nil, fmt.Errorf("unknown capability %q", c) + } + effective = append(effective, v) + } + inheritable := []capability.Cap{} + for _, c := range capConfig.Inheritable { + v, ok := capabilityMap[c] + if !ok { + return nil, fmt.Errorf("unknown capability %q", c) + } + inheritable = append(inheritable, v) + } + permitted := []capability.Cap{} + for _, c := range capConfig.Permitted { + v, ok := capabilityMap[c] + if !ok { + return nil, fmt.Errorf("unknown capability %q", c) + } + permitted = append(permitted, v) + } + ambient := []capability.Cap{} + for _, c := range capConfig.Ambient { + v, ok := capabilityMap[c] + if !ok { + return nil, fmt.Errorf("unknown capability %q", c) + } + ambient = append(ambient, v) + } + pid, err := capability.NewPid(os.Getpid()) + if err != nil { + return nil, err + } + return &containerCapabilities{ + bounding: bounding, + effective: effective, + inheritable: inheritable, + permitted: permitted, + ambient: ambient, + pid: pid, + }, nil +} + +type containerCapabilities struct { + pid capability.Capabilities + bounding []capability.Cap + effective []capability.Cap + inheritable []capability.Cap + permitted []capability.Cap + ambient []capability.Cap +} + +// ApplyBoundingSet sets the capability bounding set to those specified in the whitelist. +func (c *containerCapabilities) ApplyBoundingSet() error { + c.pid.Clear(capability.BOUNDS) + c.pid.Set(capability.BOUNDS, c.bounding...) + return c.pid.Apply(capability.BOUNDS) +} + +// Apply sets all the capabilities for the current process in the config. +func (c *containerCapabilities) ApplyCaps() error { + c.pid.Clear(allCapabilityTypes) + c.pid.Set(capability.BOUNDS, c.bounding...) + c.pid.Set(capability.PERMITTED, c.permitted...) + c.pid.Set(capability.INHERITABLE, c.inheritable...) + c.pid.Set(capability.EFFECTIVE, c.effective...) + c.pid.Set(capability.AMBIENT, c.ambient...) + return c.pid.Apply(allCapabilityTypes) +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go new file mode 100644 index 000000000..25ff51589 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go @@ -0,0 +1,64 @@ +// +build linux + +package cgroups + +import ( + "fmt" + + "github.com/opencontainers/runc/libcontainer/configs" +) + +type Manager interface { + // Applies cgroup configuration to the process with the specified pid + Apply(pid int) error + + // Returns the PIDs inside the cgroup set + GetPids() ([]int, error) + + // Returns the PIDs inside the cgroup set & all sub-cgroups + GetAllPids() ([]int, error) + + // Returns statistics for the cgroup set + GetStats() (*Stats, error) + + // Toggles the freezer cgroup according with specified state + Freeze(state configs.FreezerState) error + + // Destroys the cgroup set + Destroy() error + + // The option func SystemdCgroups() and Cgroupfs() require following attributes: + // Paths map[string]string + // Cgroups *configs.Cgroup + // Paths maps cgroup subsystem to path at which it is mounted. + // Cgroups specifies specific cgroup settings for the various subsystems + + // Returns cgroup paths to save in a state file and to be able to + // restore the object later. + GetPaths() map[string]string + + // Sets the cgroup as configured. + Set(container *configs.Config) error +} + +type NotFoundError struct { + Subsystem string +} + +func (e *NotFoundError) Error() string { + return fmt.Sprintf("mountpoint for %s not found", e.Subsystem) +} + +func NewNotFoundError(sub string) error { + return &NotFoundError{ + Subsystem: sub, + } +} + +func IsNotFound(err error) bool { + if err == nil { + return false + } + _, ok := err.(*NotFoundError) + return ok +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups_unsupported.go new file mode 100644 index 000000000..278d507e2 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups_unsupported.go @@ -0,0 +1,3 @@ +// +build !linux + +package cgroups diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/apply_raw.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/apply_raw.go new file mode 100644 index 000000000..22d82acb4 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/apply_raw.go @@ -0,0 +1,360 @@ +// +build linux + +package fs + +import ( + "errors" + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "sync" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" + libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils" +) + +var ( + subsystems = subsystemSet{ + &CpusetGroup{}, + &DevicesGroup{}, + &MemoryGroup{}, + &CpuGroup{}, + &CpuacctGroup{}, + &PidsGroup{}, + &BlkioGroup{}, + &HugetlbGroup{}, + &NetClsGroup{}, + &NetPrioGroup{}, + &PerfEventGroup{}, + &FreezerGroup{}, + &NameGroup{GroupName: "name=systemd", Join: true}, + } + HugePageSizes, _ = cgroups.GetHugePageSize() +) + +var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist") + +type subsystemSet []subsystem + +func (s subsystemSet) Get(name string) (subsystem, error) { + for _, ss := range s { + if ss.Name() == name { + return ss, nil + } + } + return nil, errSubsystemDoesNotExist +} + +type subsystem interface { + // Name returns the name of the subsystem. + Name() string + // Returns the stats, as 'stats', corresponding to the cgroup under 'path'. + GetStats(path string, stats *cgroups.Stats) error + // Removes the cgroup represented by 'cgroupData'. + Remove(*cgroupData) error + // Creates and joins the cgroup represented by 'cgroupData'. + Apply(*cgroupData) error + // Set the cgroup represented by cgroup. + Set(path string, cgroup *configs.Cgroup) error +} + +type Manager struct { + mu sync.Mutex + Cgroups *configs.Cgroup + Paths map[string]string +} + +// The absolute path to the root of the cgroup hierarchies. +var cgroupRootLock sync.Mutex +var cgroupRoot string + +// Gets the cgroupRoot. +func getCgroupRoot() (string, error) { + cgroupRootLock.Lock() + defer cgroupRootLock.Unlock() + + if cgroupRoot != "" { + return cgroupRoot, nil + } + + root, err := cgroups.FindCgroupMountpointDir() + if err != nil { + return "", err + } + + if _, err := os.Stat(root); err != nil { + return "", err + } + + cgroupRoot = root + return cgroupRoot, nil +} + +type cgroupData struct { + root string + innerPath string + config *configs.Cgroup + pid int +} + +func (m *Manager) Apply(pid int) (err error) { + if m.Cgroups == nil { + return nil + } + m.mu.Lock() + defer m.mu.Unlock() + + var c = m.Cgroups + + d, err := getCgroupData(m.Cgroups, pid) + if err != nil { + return err + } + + m.Paths = make(map[string]string) + if c.Paths != nil { + for name, path := range c.Paths { + _, err := d.path(name) + if err != nil { + if cgroups.IsNotFound(err) { + continue + } + return err + } + m.Paths[name] = path + } + return cgroups.EnterPid(m.Paths, pid) + } + + for _, sys := range subsystems { + // TODO: Apply should, ideally, be reentrant or be broken up into a separate + // create and join phase so that the cgroup hierarchy for a container can be + // created then join consists of writing the process pids to cgroup.procs + p, err := d.path(sys.Name()) + if err != nil { + // The non-presence of the devices subsystem is + // considered fatal for security reasons. + if cgroups.IsNotFound(err) && sys.Name() != "devices" { + continue + } + return err + } + m.Paths[sys.Name()] = p + + if err := sys.Apply(d); err != nil { + return err + } + } + return nil +} + +func (m *Manager) Destroy() error { + if m.Cgroups.Paths != nil { + return nil + } + m.mu.Lock() + defer m.mu.Unlock() + if err := cgroups.RemovePaths(m.Paths); err != nil { + return err + } + m.Paths = make(map[string]string) + return nil +} + +func (m *Manager) GetPaths() map[string]string { + m.mu.Lock() + paths := m.Paths + m.mu.Unlock() + return paths +} + +func (m *Manager) GetStats() (*cgroups.Stats, error) { + m.mu.Lock() + defer m.mu.Unlock() + stats := cgroups.NewStats() + for name, path := range m.Paths { + sys, err := subsystems.Get(name) + if err == errSubsystemDoesNotExist || !cgroups.PathExists(path) { + continue + } + if err := sys.GetStats(path, stats); err != nil { + return nil, err + } + } + return stats, nil +} + +func (m *Manager) Set(container *configs.Config) error { + // If Paths are set, then we are just joining cgroups paths + // and there is no need to set any values. + if m.Cgroups.Paths != nil { + return nil + } + + paths := m.GetPaths() + for _, sys := range subsystems { + path := paths[sys.Name()] + if err := sys.Set(path, container.Cgroups); err != nil { + return err + } + } + + if m.Paths["cpu"] != "" { + if err := CheckCpushares(m.Paths["cpu"], container.Cgroups.Resources.CpuShares); err != nil { + return err + } + } + return nil +} + +// Freeze toggles the container's freezer cgroup depending on the state +// provided +func (m *Manager) Freeze(state configs.FreezerState) error { + paths := m.GetPaths() + dir := paths["freezer"] + prevState := m.Cgroups.Resources.Freezer + m.Cgroups.Resources.Freezer = state + freezer, err := subsystems.Get("freezer") + if err != nil { + return err + } + err = freezer.Set(dir, m.Cgroups) + if err != nil { + m.Cgroups.Resources.Freezer = prevState + return err + } + return nil +} + +func (m *Manager) GetPids() ([]int, error) { + paths := m.GetPaths() + return cgroups.GetPids(paths["devices"]) +} + +func (m *Manager) GetAllPids() ([]int, error) { + paths := m.GetPaths() + return cgroups.GetAllPids(paths["devices"]) +} + +func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) { + root, err := getCgroupRoot() + if err != nil { + return nil, err + } + + if (c.Name != "" || c.Parent != "") && c.Path != "" { + return nil, fmt.Errorf("cgroup: either Path or Name and Parent should be used") + } + + // XXX: Do not remove this code. Path safety is important! -- cyphar + cgPath := libcontainerUtils.CleanPath(c.Path) + cgParent := libcontainerUtils.CleanPath(c.Parent) + cgName := libcontainerUtils.CleanPath(c.Name) + + innerPath := cgPath + if innerPath == "" { + innerPath = filepath.Join(cgParent, cgName) + } + + return &cgroupData{ + root: root, + innerPath: innerPath, + config: c, + pid: pid, + }, nil +} + +func (raw *cgroupData) path(subsystem string) (string, error) { + mnt, err := cgroups.FindCgroupMountpoint(subsystem) + // If we didn't mount the subsystem, there is no point we make the path. + if err != nil { + return "", err + } + + // If the cgroup name/path is absolute do not look relative to the cgroup of the init process. + if filepath.IsAbs(raw.innerPath) { + // Sometimes subsystems can be mounted together as 'cpu,cpuacct'. + return filepath.Join(raw.root, filepath.Base(mnt), raw.innerPath), nil + } + + // Use GetOwnCgroupPath instead of GetInitCgroupPath, because the creating + // process could in container and shared pid namespace with host, and + // /proc/1/cgroup could point to whole other world of cgroups. + parentPath, err := cgroups.GetOwnCgroupPath(subsystem) + if err != nil { + return "", err + } + + return filepath.Join(parentPath, raw.innerPath), nil +} + +func (raw *cgroupData) join(subsystem string) (string, error) { + path, err := raw.path(subsystem) + if err != nil { + return "", err + } + if err := os.MkdirAll(path, 0755); err != nil { + return "", err + } + if err := cgroups.WriteCgroupProc(path, raw.pid); err != nil { + return "", err + } + return path, nil +} + +func writeFile(dir, file, data string) error { + // Normally dir should not be empty, one case is that cgroup subsystem + // is not mounted, we will get empty dir, and we want it fail here. + if dir == "" { + return fmt.Errorf("no such directory for %s", file) + } + if err := ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700); err != nil { + return fmt.Errorf("failed to write %v to %v: %v", data, file, err) + } + return nil +} + +func readFile(dir, file string) (string, error) { + data, err := ioutil.ReadFile(filepath.Join(dir, file)) + return string(data), err +} + +func removePath(p string, err error) error { + if err != nil { + return err + } + if p != "" { + return os.RemoveAll(p) + } + return nil +} + +func CheckCpushares(path string, c uint64) error { + var cpuShares uint64 + + if c == 0 { + return nil + } + + fd, err := os.Open(filepath.Join(path, "cpu.shares")) + if err != nil { + return err + } + defer fd.Close() + + _, err = fmt.Fscanf(fd, "%d", &cpuShares) + if err != nil && err != io.EOF { + return err + } + + if c > cpuShares { + return fmt.Errorf("The maximum allowed cpu-shares is %d", cpuShares) + } else if c < cpuShares { + return fmt.Errorf("The minimum allowed cpu-shares is %d", cpuShares) + } + + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go new file mode 100644 index 000000000..a142cb991 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go @@ -0,0 +1,237 @@ +// +build linux + +package fs + +import ( + "bufio" + "fmt" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type BlkioGroup struct { +} + +func (s *BlkioGroup) Name() string { + return "blkio" +} + +func (s *BlkioGroup) Apply(d *cgroupData) error { + _, err := d.join("blkio") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return nil +} + +func (s *BlkioGroup) Set(path string, cgroup *configs.Cgroup) error { + if cgroup.Resources.BlkioWeight != 0 { + if err := writeFile(path, "blkio.weight", strconv.FormatUint(uint64(cgroup.Resources.BlkioWeight), 10)); err != nil { + return err + } + } + + if cgroup.Resources.BlkioLeafWeight != 0 { + if err := writeFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(cgroup.Resources.BlkioLeafWeight), 10)); err != nil { + return err + } + } + for _, wd := range cgroup.Resources.BlkioWeightDevice { + if err := writeFile(path, "blkio.weight_device", wd.WeightString()); err != nil { + return err + } + if err := writeFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil { + return err + } + } + for _, td := range cgroup.Resources.BlkioThrottleReadBpsDevice { + if err := writeFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil { + return err + } + } + for _, td := range cgroup.Resources.BlkioThrottleWriteBpsDevice { + if err := writeFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil { + return err + } + } + for _, td := range cgroup.Resources.BlkioThrottleReadIOPSDevice { + if err := writeFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil { + return err + } + } + for _, td := range cgroup.Resources.BlkioThrottleWriteIOPSDevice { + if err := writeFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil { + return err + } + } + + return nil +} + +func (s *BlkioGroup) Remove(d *cgroupData) error { + return removePath(d.path("blkio")) +} + +/* +examples: + + blkio.sectors + 8:0 6792 + + blkio.io_service_bytes + 8:0 Read 1282048 + 8:0 Write 2195456 + 8:0 Sync 2195456 + 8:0 Async 1282048 + 8:0 Total 3477504 + Total 3477504 + + blkio.io_serviced + 8:0 Read 124 + 8:0 Write 104 + 8:0 Sync 104 + 8:0 Async 124 + 8:0 Total 228 + Total 228 + + blkio.io_queued + 8:0 Read 0 + 8:0 Write 0 + 8:0 Sync 0 + 8:0 Async 0 + 8:0 Total 0 + Total 0 +*/ + +func splitBlkioStatLine(r rune) bool { + return r == ' ' || r == ':' +} + +func getBlkioStat(path string) ([]cgroups.BlkioStatEntry, error) { + var blkioStats []cgroups.BlkioStatEntry + f, err := os.Open(path) + if err != nil { + if os.IsNotExist(err) { + return blkioStats, nil + } + return nil, err + } + defer f.Close() + + sc := bufio.NewScanner(f) + for sc.Scan() { + // format: dev type amount + fields := strings.FieldsFunc(sc.Text(), splitBlkioStatLine) + if len(fields) < 3 { + if len(fields) == 2 && fields[0] == "Total" { + // skip total line + continue + } else { + return nil, fmt.Errorf("Invalid line found while parsing %s: %s", path, sc.Text()) + } + } + + v, err := strconv.ParseUint(fields[0], 10, 64) + if err != nil { + return nil, err + } + major := v + + v, err = strconv.ParseUint(fields[1], 10, 64) + if err != nil { + return nil, err + } + minor := v + + op := "" + valueField := 2 + if len(fields) == 4 { + op = fields[2] + valueField = 3 + } + v, err = strconv.ParseUint(fields[valueField], 10, 64) + if err != nil { + return nil, err + } + blkioStats = append(blkioStats, cgroups.BlkioStatEntry{Major: major, Minor: minor, Op: op, Value: v}) + } + + return blkioStats, nil +} + +func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error { + // Try to read CFQ stats available on all CFQ enabled kernels first + if blkioStats, err := getBlkioStat(filepath.Join(path, "blkio.io_serviced_recursive")); err == nil && blkioStats != nil { + return getCFQStats(path, stats) + } + return getStats(path, stats) // Use generic stats as fallback +} + +func getCFQStats(path string, stats *cgroups.Stats) error { + var blkioStats []cgroups.BlkioStatEntry + var err error + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.sectors_recursive")); err != nil { + return err + } + stats.BlkioStats.SectorsRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_service_bytes_recursive")); err != nil { + return err + } + stats.BlkioStats.IoServiceBytesRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_serviced_recursive")); err != nil { + return err + } + stats.BlkioStats.IoServicedRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_queued_recursive")); err != nil { + return err + } + stats.BlkioStats.IoQueuedRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_service_time_recursive")); err != nil { + return err + } + stats.BlkioStats.IoServiceTimeRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_wait_time_recursive")); err != nil { + return err + } + stats.BlkioStats.IoWaitTimeRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_merged_recursive")); err != nil { + return err + } + stats.BlkioStats.IoMergedRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.time_recursive")); err != nil { + return err + } + stats.BlkioStats.IoTimeRecursive = blkioStats + + return nil +} + +func getStats(path string, stats *cgroups.Stats) error { + var blkioStats []cgroups.BlkioStatEntry + var err error + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.throttle.io_service_bytes")); err != nil { + return err + } + stats.BlkioStats.IoServiceBytesRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.throttle.io_serviced")); err != nil { + return err + } + stats.BlkioStats.IoServicedRecursive = blkioStats + + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go new file mode 100644 index 000000000..b712bd0b1 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go @@ -0,0 +1,125 @@ +// +build linux + +package fs + +import ( + "bufio" + "os" + "path/filepath" + "strconv" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type CpuGroup struct { +} + +func (s *CpuGroup) Name() string { + return "cpu" +} + +func (s *CpuGroup) Apply(d *cgroupData) error { + // We always want to join the cpu group, to allow fair cpu scheduling + // on a container basis + path, err := d.path("cpu") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return s.ApplyDir(path, d.config, d.pid) +} + +func (s *CpuGroup) ApplyDir(path string, cgroup *configs.Cgroup, pid int) error { + // This might happen if we have no cpu cgroup mounted. + // Just do nothing and don't fail. + if path == "" { + return nil + } + if err := os.MkdirAll(path, 0755); err != nil { + return err + } + // We should set the real-Time group scheduling settings before moving + // in the process because if the process is already in SCHED_RR mode + // and no RT bandwidth is set, adding it will fail. + if err := s.SetRtSched(path, cgroup); err != nil { + return err + } + // because we are not using d.join we need to place the pid into the procs file + // unlike the other subsystems + if err := cgroups.WriteCgroupProc(path, pid); err != nil { + return err + } + + return nil +} + +func (s *CpuGroup) SetRtSched(path string, cgroup *configs.Cgroup) error { + if cgroup.Resources.CpuRtPeriod != 0 { + if err := writeFile(path, "cpu.rt_period_us", strconv.FormatUint(cgroup.Resources.CpuRtPeriod, 10)); err != nil { + return err + } + } + if cgroup.Resources.CpuRtRuntime != 0 { + if err := writeFile(path, "cpu.rt_runtime_us", strconv.FormatInt(cgroup.Resources.CpuRtRuntime, 10)); err != nil { + return err + } + } + return nil +} + +func (s *CpuGroup) Set(path string, cgroup *configs.Cgroup) error { + if cgroup.Resources.CpuShares != 0 { + if err := writeFile(path, "cpu.shares", strconv.FormatUint(cgroup.Resources.CpuShares, 10)); err != nil { + return err + } + } + if cgroup.Resources.CpuPeriod != 0 { + if err := writeFile(path, "cpu.cfs_period_us", strconv.FormatUint(cgroup.Resources.CpuPeriod, 10)); err != nil { + return err + } + } + if cgroup.Resources.CpuQuota != 0 { + if err := writeFile(path, "cpu.cfs_quota_us", strconv.FormatInt(cgroup.Resources.CpuQuota, 10)); err != nil { + return err + } + } + if err := s.SetRtSched(path, cgroup); err != nil { + return err + } + + return nil +} + +func (s *CpuGroup) Remove(d *cgroupData) error { + return removePath(d.path("cpu")) +} + +func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error { + f, err := os.Open(filepath.Join(path, "cpu.stat")) + if err != nil { + if os.IsNotExist(err) { + return nil + } + return err + } + defer f.Close() + + sc := bufio.NewScanner(f) + for sc.Scan() { + t, v, err := getCgroupParamKeyValue(sc.Text()) + if err != nil { + return err + } + switch t { + case "nr_periods": + stats.CpuStats.ThrottlingData.Periods = v + + case "nr_throttled": + stats.CpuStats.ThrottlingData.ThrottledPeriods = v + + case "throttled_time": + stats.CpuStats.ThrottlingData.ThrottledTime = v + } + } + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go new file mode 100644 index 000000000..53afbaddf --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go @@ -0,0 +1,121 @@ +// +build linux + +package fs + +import ( + "fmt" + "io/ioutil" + "path/filepath" + "strconv" + "strings" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/system" +) + +const ( + cgroupCpuacctStat = "cpuacct.stat" + nanosecondsInSecond = 1000000000 +) + +var clockTicks = uint64(system.GetClockTicks()) + +type CpuacctGroup struct { +} + +func (s *CpuacctGroup) Name() string { + return "cpuacct" +} + +func (s *CpuacctGroup) Apply(d *cgroupData) error { + // we just want to join this group even though we don't set anything + if _, err := d.join("cpuacct"); err != nil && !cgroups.IsNotFound(err) { + return err + } + + return nil +} + +func (s *CpuacctGroup) Set(path string, cgroup *configs.Cgroup) error { + return nil +} + +func (s *CpuacctGroup) Remove(d *cgroupData) error { + return removePath(d.path("cpuacct")) +} + +func (s *CpuacctGroup) GetStats(path string, stats *cgroups.Stats) error { + userModeUsage, kernelModeUsage, err := getCpuUsageBreakdown(path) + if err != nil { + return err + } + + totalUsage, err := getCgroupParamUint(path, "cpuacct.usage") + if err != nil { + return err + } + + percpuUsage, err := getPercpuUsage(path) + if err != nil { + return err + } + + stats.CpuStats.CpuUsage.TotalUsage = totalUsage + stats.CpuStats.CpuUsage.PercpuUsage = percpuUsage + stats.CpuStats.CpuUsage.UsageInUsermode = userModeUsage + stats.CpuStats.CpuUsage.UsageInKernelmode = kernelModeUsage + return nil +} + +// Returns user and kernel usage breakdown in nanoseconds. +func getCpuUsageBreakdown(path string) (uint64, uint64, error) { + userModeUsage := uint64(0) + kernelModeUsage := uint64(0) + const ( + userField = "user" + systemField = "system" + ) + + // Expected format: + // user <usage in ticks> + // system <usage in ticks> + data, err := ioutil.ReadFile(filepath.Join(path, cgroupCpuacctStat)) + if err != nil { + return 0, 0, err + } + fields := strings.Fields(string(data)) + if len(fields) != 4 { + return 0, 0, fmt.Errorf("failure - %s is expected to have 4 fields", filepath.Join(path, cgroupCpuacctStat)) + } + if fields[0] != userField { + return 0, 0, fmt.Errorf("unexpected field %q in %q, expected %q", fields[0], cgroupCpuacctStat, userField) + } + if fields[2] != systemField { + return 0, 0, fmt.Errorf("unexpected field %q in %q, expected %q", fields[2], cgroupCpuacctStat, systemField) + } + if userModeUsage, err = strconv.ParseUint(fields[1], 10, 64); err != nil { + return 0, 0, err + } + if kernelModeUsage, err = strconv.ParseUint(fields[3], 10, 64); err != nil { + return 0, 0, err + } + + return (userModeUsage * nanosecondsInSecond) / clockTicks, (kernelModeUsage * nanosecondsInSecond) / clockTicks, nil +} + +func getPercpuUsage(path string) ([]uint64, error) { + percpuUsage := []uint64{} + data, err := ioutil.ReadFile(filepath.Join(path, "cpuacct.usage_percpu")) + if err != nil { + return percpuUsage, err + } + for _, value := range strings.Fields(string(data)) { + value, err := strconv.ParseUint(value, 10, 64) + if err != nil { + return percpuUsage, fmt.Errorf("Unable to convert param value to uint64: %s", err) + } + percpuUsage = append(percpuUsage, value) + } + return percpuUsage, nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go new file mode 100644 index 000000000..20c9eafac --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go @@ -0,0 +1,163 @@ +// +build linux + +package fs + +import ( + "bytes" + "fmt" + "io/ioutil" + "os" + "path/filepath" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" + libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils" +) + +type CpusetGroup struct { +} + +func (s *CpusetGroup) Name() string { + return "cpuset" +} + +func (s *CpusetGroup) Apply(d *cgroupData) error { + dir, err := d.path("cpuset") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return s.ApplyDir(dir, d.config, d.pid) +} + +func (s *CpusetGroup) Set(path string, cgroup *configs.Cgroup) error { + if cgroup.Resources.CpusetCpus != "" { + if err := writeFile(path, "cpuset.cpus", cgroup.Resources.CpusetCpus); err != nil { + return err + } + } + if cgroup.Resources.CpusetMems != "" { + if err := writeFile(path, "cpuset.mems", cgroup.Resources.CpusetMems); err != nil { + return err + } + } + return nil +} + +func (s *CpusetGroup) Remove(d *cgroupData) error { + return removePath(d.path("cpuset")) +} + +func (s *CpusetGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} + +func (s *CpusetGroup) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) error { + // This might happen if we have no cpuset cgroup mounted. + // Just do nothing and don't fail. + if dir == "" { + return nil + } + mountInfo, err := ioutil.ReadFile("/proc/self/mountinfo") + if err != nil { + return err + } + root := filepath.Dir(cgroups.GetClosestMountpointAncestor(dir, string(mountInfo))) + // 'ensureParent' start with parent because we don't want to + // explicitly inherit from parent, it could conflict with + // 'cpuset.cpu_exclusive'. + if err := s.ensureParent(filepath.Dir(dir), root); err != nil { + return err + } + if err := os.MkdirAll(dir, 0755); err != nil { + return err + } + // We didn't inherit cpuset configs from parent, but we have + // to ensure cpuset configs are set before moving task into the + // cgroup. + // The logic is, if user specified cpuset configs, use these + // specified configs, otherwise, inherit from parent. This makes + // cpuset configs work correctly with 'cpuset.cpu_exclusive', and + // keep backward compatbility. + if err := s.ensureCpusAndMems(dir, cgroup); err != nil { + return err + } + + // because we are not using d.join we need to place the pid into the procs file + // unlike the other subsystems + if err := cgroups.WriteCgroupProc(dir, pid); err != nil { + return err + } + + return nil +} + +func (s *CpusetGroup) getSubsystemSettings(parent string) (cpus []byte, mems []byte, err error) { + if cpus, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.cpus")); err != nil { + return + } + if mems, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.mems")); err != nil { + return + } + return cpus, mems, nil +} + +// ensureParent makes sure that the parent directory of current is created +// and populated with the proper cpus and mems files copied from +// it's parent. +func (s *CpusetGroup) ensureParent(current, root string) error { + parent := filepath.Dir(current) + if libcontainerUtils.CleanPath(parent) == root { + return nil + } + // Avoid infinite recursion. + if parent == current { + return fmt.Errorf("cpuset: cgroup parent path outside cgroup root") + } + if err := s.ensureParent(parent, root); err != nil { + return err + } + if err := os.MkdirAll(current, 0755); err != nil { + return err + } + return s.copyIfNeeded(current, parent) +} + +// copyIfNeeded copies the cpuset.cpus and cpuset.mems from the parent +// directory to the current directory if the file's contents are 0 +func (s *CpusetGroup) copyIfNeeded(current, parent string) error { + var ( + err error + currentCpus, currentMems []byte + parentCpus, parentMems []byte + ) + + if currentCpus, currentMems, err = s.getSubsystemSettings(current); err != nil { + return err + } + if parentCpus, parentMems, err = s.getSubsystemSettings(parent); err != nil { + return err + } + + if s.isEmpty(currentCpus) { + if err := writeFile(current, "cpuset.cpus", string(parentCpus)); err != nil { + return err + } + } + if s.isEmpty(currentMems) { + if err := writeFile(current, "cpuset.mems", string(parentMems)); err != nil { + return err + } + } + return nil +} + +func (s *CpusetGroup) isEmpty(b []byte) bool { + return len(bytes.Trim(b, "\n")) == 0 +} + +func (s *CpusetGroup) ensureCpusAndMems(path string, cgroup *configs.Cgroup) error { + if err := s.Set(path, cgroup); err != nil { + return err + } + return s.copyIfNeeded(path, filepath.Dir(path)) +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go new file mode 100644 index 000000000..0ac5b4ed7 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go @@ -0,0 +1,80 @@ +// +build linux + +package fs + +import ( + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/system" +) + +type DevicesGroup struct { +} + +func (s *DevicesGroup) Name() string { + return "devices" +} + +func (s *DevicesGroup) Apply(d *cgroupData) error { + _, err := d.join("devices") + if err != nil { + // We will return error even it's `not found` error, devices + // cgroup is hard requirement for container's security. + return err + } + return nil +} + +func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error { + if system.RunningInUserNS() { + return nil + } + + devices := cgroup.Resources.Devices + if len(devices) > 0 { + for _, dev := range devices { + file := "devices.deny" + if dev.Allow { + file = "devices.allow" + } + if err := writeFile(path, file, dev.CgroupString()); err != nil { + return err + } + } + return nil + } + if cgroup.Resources.AllowAllDevices != nil { + if *cgroup.Resources.AllowAllDevices == false { + if err := writeFile(path, "devices.deny", "a"); err != nil { + return err + } + + for _, dev := range cgroup.Resources.AllowedDevices { + if err := writeFile(path, "devices.allow", dev.CgroupString()); err != nil { + return err + } + } + return nil + } + + if err := writeFile(path, "devices.allow", "a"); err != nil { + return err + } + } + + for _, dev := range cgroup.Resources.DeniedDevices { + if err := writeFile(path, "devices.deny", dev.CgroupString()); err != nil { + return err + } + } + + return nil +} + +func (s *DevicesGroup) Remove(d *cgroupData) error { + return removePath(d.path("devices")) +} + +func (s *DevicesGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go new file mode 100644 index 000000000..e70dfe3b9 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go @@ -0,0 +1,61 @@ +// +build linux + +package fs + +import ( + "fmt" + "strings" + "time" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type FreezerGroup struct { +} + +func (s *FreezerGroup) Name() string { + return "freezer" +} + +func (s *FreezerGroup) Apply(d *cgroupData) error { + _, err := d.join("freezer") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return nil +} + +func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error { + switch cgroup.Resources.Freezer { + case configs.Frozen, configs.Thawed: + if err := writeFile(path, "freezer.state", string(cgroup.Resources.Freezer)); err != nil { + return err + } + + for { + state, err := readFile(path, "freezer.state") + if err != nil { + return err + } + if strings.TrimSpace(state) == string(cgroup.Resources.Freezer) { + break + } + time.Sleep(1 * time.Millisecond) + } + case configs.Undefined: + return nil + default: + return fmt.Errorf("Invalid argument '%s' to freezer.state", string(cgroup.Resources.Freezer)) + } + + return nil +} + +func (s *FreezerGroup) Remove(d *cgroupData) error { + return removePath(d.path("freezer")) +} + +func (s *FreezerGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs_unsupported.go new file mode 100644 index 000000000..3ef9e0315 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs_unsupported.go @@ -0,0 +1,3 @@ +// +build !linux + +package fs diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go new file mode 100644 index 000000000..2f9727719 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go @@ -0,0 +1,71 @@ +// +build linux + +package fs + +import ( + "fmt" + "strconv" + "strings" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type HugetlbGroup struct { +} + +func (s *HugetlbGroup) Name() string { + return "hugetlb" +} + +func (s *HugetlbGroup) Apply(d *cgroupData) error { + _, err := d.join("hugetlb") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return nil +} + +func (s *HugetlbGroup) Set(path string, cgroup *configs.Cgroup) error { + for _, hugetlb := range cgroup.Resources.HugetlbLimit { + if err := writeFile(path, strings.Join([]string{"hugetlb", hugetlb.Pagesize, "limit_in_bytes"}, "."), strconv.FormatUint(hugetlb.Limit, 10)); err != nil { + return err + } + } + + return nil +} + +func (s *HugetlbGroup) Remove(d *cgroupData) error { + return removePath(d.path("hugetlb")) +} + +func (s *HugetlbGroup) GetStats(path string, stats *cgroups.Stats) error { + hugetlbStats := cgroups.HugetlbStats{} + for _, pageSize := range HugePageSizes { + usage := strings.Join([]string{"hugetlb", pageSize, "usage_in_bytes"}, ".") + value, err := getCgroupParamUint(path, usage) + if err != nil { + return fmt.Errorf("failed to parse %s - %v", usage, err) + } + hugetlbStats.Usage = value + + maxUsage := strings.Join([]string{"hugetlb", pageSize, "max_usage_in_bytes"}, ".") + value, err = getCgroupParamUint(path, maxUsage) + if err != nil { + return fmt.Errorf("failed to parse %s - %v", maxUsage, err) + } + hugetlbStats.MaxUsage = value + + failcnt := strings.Join([]string{"hugetlb", pageSize, "failcnt"}, ".") + value, err = getCgroupParamUint(path, failcnt) + if err != nil { + return fmt.Errorf("failed to parse %s - %v", failcnt, err) + } + hugetlbStats.Failcnt = value + + stats.HugetlbStats[pageSize] = hugetlbStats + } + + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go new file mode 100644 index 000000000..ad395a5d6 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go @@ -0,0 +1,313 @@ +// +build linux + +package fs + +import ( + "bufio" + "fmt" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + "syscall" // only for Errno + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" + + "golang.org/x/sys/unix" +) + +const ( + cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes" + cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes" + cgroupMemoryLimit = "memory.limit_in_bytes" +) + +type MemoryGroup struct { +} + +func (s *MemoryGroup) Name() string { + return "memory" +} + +func (s *MemoryGroup) Apply(d *cgroupData) (err error) { + path, err := d.path("memory") + if err != nil && !cgroups.IsNotFound(err) { + return err + } else if path == "" { + return nil + } + if memoryAssigned(d.config) { + if _, err := os.Stat(path); os.IsNotExist(err) { + if err := os.MkdirAll(path, 0755); err != nil { + return err + } + // Only enable kernel memory accouting when this cgroup + // is created by libcontainer, otherwise we might get + // error when people use `cgroupsPath` to join an existed + // cgroup whose kernel memory is not initialized. + if err := EnableKernelMemoryAccounting(path); err != nil { + return err + } + } + } + defer func() { + if err != nil { + os.RemoveAll(path) + } + }() + + // We need to join memory cgroup after set memory limits, because + // kmem.limit_in_bytes can only be set when the cgroup is empty. + _, err = d.join("memory") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return nil +} + +func EnableKernelMemoryAccounting(path string) error { + // Check if kernel memory is enabled + // We have to limit the kernel memory here as it won't be accounted at all + // until a limit is set on the cgroup and limit cannot be set once the + // cgroup has children, or if there are already tasks in the cgroup. + for _, i := range []int64{1, -1} { + if err := setKernelMemory(path, i); err != nil { + return err + } + } + return nil +} + +func setKernelMemory(path string, kernelMemoryLimit int64) error { + if path == "" { + return fmt.Errorf("no such directory for %s", cgroupKernelMemoryLimit) + } + if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) { + // kernel memory is not enabled on the system so we should do nothing + return nil + } + if err := ioutil.WriteFile(filepath.Join(path, cgroupKernelMemoryLimit), []byte(strconv.FormatInt(kernelMemoryLimit, 10)), 0700); err != nil { + // Check if the error number returned by the syscall is "EBUSY" + // The EBUSY signal is returned on attempts to write to the + // memory.kmem.limit_in_bytes file if the cgroup has children or + // once tasks have been attached to the cgroup + if pathErr, ok := err.(*os.PathError); ok { + if errNo, ok := pathErr.Err.(syscall.Errno); ok { + if errNo == unix.EBUSY { + return fmt.Errorf("failed to set %s, because either tasks have already joined this cgroup or it has children", cgroupKernelMemoryLimit) + } + } + } + return fmt.Errorf("failed to write %v to %v: %v", kernelMemoryLimit, cgroupKernelMemoryLimit, err) + } + return nil +} + +func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error { + // If the memory update is set to -1 we should also + // set swap to -1, it means unlimited memory. + if cgroup.Resources.Memory == -1 { + // Only set swap if it's enabled in kernel + if cgroups.PathExists(filepath.Join(path, cgroupMemorySwapLimit)) { + cgroup.Resources.MemorySwap = -1 + } + } + + // When memory and swap memory are both set, we need to handle the cases + // for updating container. + if cgroup.Resources.Memory != 0 && cgroup.Resources.MemorySwap != 0 { + memoryUsage, err := getMemoryData(path, "") + if err != nil { + return err + } + + // When update memory limit, we should adapt the write sequence + // for memory and swap memory, so it won't fail because the new + // value and the old value don't fit kernel's validation. + if cgroup.Resources.MemorySwap == -1 || memoryUsage.Limit < uint64(cgroup.Resources.MemorySwap) { + if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil { + return err + } + if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil { + return err + } + } else { + if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil { + return err + } + if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil { + return err + } + } + } else { + if cgroup.Resources.Memory != 0 { + if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil { + return err + } + } + if cgroup.Resources.MemorySwap != 0 { + if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil { + return err + } + } + } + + return nil +} + +func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error { + if err := setMemoryAndSwap(path, cgroup); err != nil { + return err + } + + if cgroup.Resources.KernelMemory != 0 { + if err := setKernelMemory(path, cgroup.Resources.KernelMemory); err != nil { + return err + } + } + + if cgroup.Resources.MemoryReservation != 0 { + if err := writeFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemoryReservation, 10)); err != nil { + return err + } + } + + if cgroup.Resources.KernelMemoryTCP != 0 { + if err := writeFile(path, "memory.kmem.tcp.limit_in_bytes", strconv.FormatInt(cgroup.Resources.KernelMemoryTCP, 10)); err != nil { + return err + } + } + if cgroup.Resources.OomKillDisable { + if err := writeFile(path, "memory.oom_control", "1"); err != nil { + return err + } + } + if cgroup.Resources.MemorySwappiness == nil || int64(*cgroup.Resources.MemorySwappiness) == -1 { + return nil + } else if *cgroup.Resources.MemorySwappiness <= 100 { + if err := writeFile(path, "memory.swappiness", strconv.FormatUint(*cgroup.Resources.MemorySwappiness, 10)); err != nil { + return err + } + } else { + return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", *cgroup.Resources.MemorySwappiness) + } + + return nil +} + +func (s *MemoryGroup) Remove(d *cgroupData) error { + return removePath(d.path("memory")) +} + +func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error { + // Set stats from memory.stat. + statsFile, err := os.Open(filepath.Join(path, "memory.stat")) + if err != nil { + if os.IsNotExist(err) { + return nil + } + return err + } + defer statsFile.Close() + + sc := bufio.NewScanner(statsFile) + for sc.Scan() { + t, v, err := getCgroupParamKeyValue(sc.Text()) + if err != nil { + return fmt.Errorf("failed to parse memory.stat (%q) - %v", sc.Text(), err) + } + stats.MemoryStats.Stats[t] = v + } + stats.MemoryStats.Cache = stats.MemoryStats.Stats["cache"] + + memoryUsage, err := getMemoryData(path, "") + if err != nil { + return err + } + stats.MemoryStats.Usage = memoryUsage + swapUsage, err := getMemoryData(path, "memsw") + if err != nil { + return err + } + stats.MemoryStats.SwapUsage = swapUsage + kernelUsage, err := getMemoryData(path, "kmem") + if err != nil { + return err + } + stats.MemoryStats.KernelUsage = kernelUsage + kernelTCPUsage, err := getMemoryData(path, "kmem.tcp") + if err != nil { + return err + } + stats.MemoryStats.KernelTCPUsage = kernelTCPUsage + + useHierarchy := strings.Join([]string{"memory", "use_hierarchy"}, ".") + value, err := getCgroupParamUint(path, useHierarchy) + if err != nil { + return err + } + if value == 1 { + stats.MemoryStats.UseHierarchy = true + } + return nil +} + +func memoryAssigned(cgroup *configs.Cgroup) bool { + return cgroup.Resources.Memory != 0 || + cgroup.Resources.MemoryReservation != 0 || + cgroup.Resources.MemorySwap > 0 || + cgroup.Resources.KernelMemory > 0 || + cgroup.Resources.KernelMemoryTCP > 0 || + cgroup.Resources.OomKillDisable || + (cgroup.Resources.MemorySwappiness != nil && int64(*cgroup.Resources.MemorySwappiness) != -1) +} + +func getMemoryData(path, name string) (cgroups.MemoryData, error) { + memoryData := cgroups.MemoryData{} + + moduleName := "memory" + if name != "" { + moduleName = strings.Join([]string{"memory", name}, ".") + } + usage := strings.Join([]string{moduleName, "usage_in_bytes"}, ".") + maxUsage := strings.Join([]string{moduleName, "max_usage_in_bytes"}, ".") + failcnt := strings.Join([]string{moduleName, "failcnt"}, ".") + limit := strings.Join([]string{moduleName, "limit_in_bytes"}, ".") + + value, err := getCgroupParamUint(path, usage) + if err != nil { + if moduleName != "memory" && os.IsNotExist(err) { + return cgroups.MemoryData{}, nil + } + return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", usage, err) + } + memoryData.Usage = value + value, err = getCgroupParamUint(path, maxUsage) + if err != nil { + if moduleName != "memory" && os.IsNotExist(err) { + return cgroups.MemoryData{}, nil + } + return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", maxUsage, err) + } + memoryData.MaxUsage = value + value, err = getCgroupParamUint(path, failcnt) + if err != nil { + if moduleName != "memory" && os.IsNotExist(err) { + return cgroups.MemoryData{}, nil + } + return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", failcnt, err) + } + memoryData.Failcnt = value + value, err = getCgroupParamUint(path, limit) + if err != nil { + if moduleName != "memory" && os.IsNotExist(err) { + return cgroups.MemoryData{}, nil + } + return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", limit, err) + } + memoryData.Limit = value + + return memoryData, nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go new file mode 100644 index 000000000..d8cf1d87c --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go @@ -0,0 +1,40 @@ +// +build linux + +package fs + +import ( + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type NameGroup struct { + GroupName string + Join bool +} + +func (s *NameGroup) Name() string { + return s.GroupName +} + +func (s *NameGroup) Apply(d *cgroupData) error { + if s.Join { + // ignore errors if the named cgroup does not exist + d.join(s.GroupName) + } + return nil +} + +func (s *NameGroup) Set(path string, cgroup *configs.Cgroup) error { + return nil +} + +func (s *NameGroup) Remove(d *cgroupData) error { + if s.Join { + removePath(d.path(s.GroupName)) + } + return nil +} + +func (s *NameGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go new file mode 100644 index 000000000..8e74b645e --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go @@ -0,0 +1,43 @@ +// +build linux + +package fs + +import ( + "strconv" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type NetClsGroup struct { +} + +func (s *NetClsGroup) Name() string { + return "net_cls" +} + +func (s *NetClsGroup) Apply(d *cgroupData) error { + _, err := d.join("net_cls") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return nil +} + +func (s *NetClsGroup) Set(path string, cgroup *configs.Cgroup) error { + if cgroup.Resources.NetClsClassid != 0 { + if err := writeFile(path, "net_cls.classid", strconv.FormatUint(uint64(cgroup.Resources.NetClsClassid), 10)); err != nil { + return err + } + } + + return nil +} + +func (s *NetClsGroup) Remove(d *cgroupData) error { + return removePath(d.path("net_cls")) +} + +func (s *NetClsGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio.go new file mode 100644 index 000000000..d0ab2af89 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio.go @@ -0,0 +1,41 @@ +// +build linux + +package fs + +import ( + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type NetPrioGroup struct { +} + +func (s *NetPrioGroup) Name() string { + return "net_prio" +} + +func (s *NetPrioGroup) Apply(d *cgroupData) error { + _, err := d.join("net_prio") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return nil +} + +func (s *NetPrioGroup) Set(path string, cgroup *configs.Cgroup) error { + for _, prioMap := range cgroup.Resources.NetPrioIfpriomap { + if err := writeFile(path, "net_prio.ifpriomap", prioMap.CgroupString()); err != nil { + return err + } + } + + return nil +} + +func (s *NetPrioGroup) Remove(d *cgroupData) error { + return removePath(d.path("net_prio")) +} + +func (s *NetPrioGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/perf_event.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/perf_event.go new file mode 100644 index 000000000..5693676d3 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/perf_event.go @@ -0,0 +1,35 @@ +// +build linux + +package fs + +import ( + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type PerfEventGroup struct { +} + +func (s *PerfEventGroup) Name() string { + return "perf_event" +} + +func (s *PerfEventGroup) Apply(d *cgroupData) error { + // we just want to join this group even though we don't set anything + if _, err := d.join("perf_event"); err != nil && !cgroups.IsNotFound(err) { + return err + } + return nil +} + +func (s *PerfEventGroup) Set(path string, cgroup *configs.Cgroup) error { + return nil +} + +func (s *PerfEventGroup) Remove(d *cgroupData) error { + return removePath(d.path("perf_event")) +} + +func (s *PerfEventGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go new file mode 100644 index 000000000..f1e372055 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go @@ -0,0 +1,73 @@ +// +build linux + +package fs + +import ( + "fmt" + "path/filepath" + "strconv" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type PidsGroup struct { +} + +func (s *PidsGroup) Name() string { + return "pids" +} + +func (s *PidsGroup) Apply(d *cgroupData) error { + _, err := d.join("pids") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return nil +} + +func (s *PidsGroup) Set(path string, cgroup *configs.Cgroup) error { + if cgroup.Resources.PidsLimit != 0 { + // "max" is the fallback value. + limit := "max" + + if cgroup.Resources.PidsLimit > 0 { + limit = strconv.FormatInt(cgroup.Resources.PidsLimit, 10) + } + + if err := writeFile(path, "pids.max", limit); err != nil { + return err + } + } + + return nil +} + +func (s *PidsGroup) Remove(d *cgroupData) error { + return removePath(d.path("pids")) +} + +func (s *PidsGroup) GetStats(path string, stats *cgroups.Stats) error { + current, err := getCgroupParamUint(path, "pids.current") + if err != nil { + return fmt.Errorf("failed to parse pids.current - %s", err) + } + + maxString, err := getCgroupParamString(path, "pids.max") + if err != nil { + return fmt.Errorf("failed to parse pids.max - %s", err) + } + + // Default if pids.max == "max" is 0 -- which represents "no limit". + var max uint64 + if maxString != "max" { + max, err = parseUint(maxString, 10, 64) + if err != nil { + return fmt.Errorf("failed to parse pids.max - unable to parse %q as a uint from Cgroup file %q", maxString, filepath.Join(path, "pids.max")) + } + } + + stats.PidsStats.Current = current + stats.PidsStats.Limit = max + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/utils.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/utils.go new file mode 100644 index 000000000..5ff0a1615 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/utils.go @@ -0,0 +1,78 @@ +// +build linux + +package fs + +import ( + "errors" + "fmt" + "io/ioutil" + "path/filepath" + "strconv" + "strings" +) + +var ( + ErrNotValidFormat = errors.New("line is not a valid key value format") +) + +// Saturates negative values at zero and returns a uint64. +// Due to kernel bugs, some of the memory cgroup stats can be negative. +func parseUint(s string, base, bitSize int) (uint64, error) { + value, err := strconv.ParseUint(s, base, bitSize) + if err != nil { + intValue, intErr := strconv.ParseInt(s, base, bitSize) + // 1. Handle negative values greater than MinInt64 (and) + // 2. Handle negative values lesser than MinInt64 + if intErr == nil && intValue < 0 { + return 0, nil + } else if intErr != nil && intErr.(*strconv.NumError).Err == strconv.ErrRange && intValue < 0 { + return 0, nil + } + + return value, err + } + + return value, nil +} + +// Parses a cgroup param and returns as name, value +// i.e. "io_service_bytes 1234" will return as io_service_bytes, 1234 +func getCgroupParamKeyValue(t string) (string, uint64, error) { + parts := strings.Fields(t) + switch len(parts) { + case 2: + value, err := parseUint(parts[1], 10, 64) + if err != nil { + return "", 0, fmt.Errorf("unable to convert param value (%q) to uint64: %v", parts[1], err) + } + + return parts[0], value, nil + default: + return "", 0, ErrNotValidFormat + } +} + +// Gets a single uint64 value from the specified cgroup file. +func getCgroupParamUint(cgroupPath, cgroupFile string) (uint64, error) { + fileName := filepath.Join(cgroupPath, cgroupFile) + contents, err := ioutil.ReadFile(fileName) + if err != nil { + return 0, err + } + + res, err := parseUint(strings.TrimSpace(string(contents)), 10, 64) + if err != nil { + return res, fmt.Errorf("unable to parse %q as a uint from Cgroup file %q", string(contents), fileName) + } + return res, nil +} + +// Gets a string value from the specified cgroup file +func getCgroupParamString(cgroupPath, cgroupFile string) (string, error) { + contents, err := ioutil.ReadFile(filepath.Join(cgroupPath, cgroupFile)) + if err != nil { + return "", err + } + + return strings.TrimSpace(string(contents)), nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/rootless/rootless.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/rootless/rootless.go new file mode 100644 index 000000000..b1efbfd99 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/rootless/rootless.go @@ -0,0 +1,128 @@ +// +build linux + +package rootless + +import ( + "fmt" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fs" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/configs/validate" +) + +// TODO: This is copied from libcontainer/cgroups/fs, which duplicates this code +// needlessly. We should probably export this list. + +var subsystems = []subsystem{ + &fs.CpusetGroup{}, + &fs.DevicesGroup{}, + &fs.MemoryGroup{}, + &fs.CpuGroup{}, + &fs.CpuacctGroup{}, + &fs.PidsGroup{}, + &fs.BlkioGroup{}, + &fs.HugetlbGroup{}, + &fs.NetClsGroup{}, + &fs.NetPrioGroup{}, + &fs.PerfEventGroup{}, + &fs.FreezerGroup{}, + &fs.NameGroup{GroupName: "name=systemd"}, +} + +type subsystem interface { + // Name returns the name of the subsystem. + Name() string + + // Returns the stats, as 'stats', corresponding to the cgroup under 'path'. + GetStats(path string, stats *cgroups.Stats) error +} + +// The noop cgroup manager is used for rootless containers, because we currently +// cannot manage cgroups if we are in a rootless setup. This manager is chosen +// by factory if we are in rootless mode. We error out if any cgroup options are +// set in the config -- this may change in the future with upcoming kernel features +// like the cgroup namespace. + +type Manager struct { + Cgroups *configs.Cgroup + Paths map[string]string +} + +func (m *Manager) Apply(pid int) error { + // If there are no cgroup settings, there's nothing to do. + if m.Cgroups == nil { + return nil + } + + // We can't set paths. + // TODO(cyphar): Implement the case where the runner of a rootless container + // owns their own cgroup, which would allow us to set up a + // cgroup for each path. + if m.Cgroups.Paths != nil { + return fmt.Errorf("cannot change cgroup path in rootless container") + } + + // We load the paths into the manager. + paths := make(map[string]string) + for _, sys := range subsystems { + name := sys.Name() + + path, err := cgroups.GetOwnCgroupPath(name) + if err != nil { + // Ignore paths we couldn't resolve. + continue + } + + paths[name] = path + } + + m.Paths = paths + return nil +} + +func (m *Manager) GetPaths() map[string]string { + return m.Paths +} + +func (m *Manager) Set(container *configs.Config) error { + // We have to re-do the validation here, since someone might decide to + // update a rootless container. + return validate.New().Validate(container) +} + +func (m *Manager) GetPids() ([]int, error) { + dir, err := cgroups.GetOwnCgroupPath("devices") + if err != nil { + return nil, err + } + return cgroups.GetPids(dir) +} + +func (m *Manager) GetAllPids() ([]int, error) { + dir, err := cgroups.GetOwnCgroupPath("devices") + if err != nil { + return nil, err + } + return cgroups.GetAllPids(dir) +} + +func (m *Manager) GetStats() (*cgroups.Stats, error) { + // TODO(cyphar): We can make this work if we figure out a way to allow usage + // of cgroups with a rootless container. While this doesn't + // actually require write access to a cgroup directory, the + // statistics are not useful if they can be affected by + // non-container processes. + return nil, fmt.Errorf("cannot get cgroup stats in rootless container") +} + +func (m *Manager) Freeze(state configs.FreezerState) error { + // TODO(cyphar): We can make this work if we figure out a way to allow usage + // of cgroups with a rootless container. + return fmt.Errorf("cannot use freezer cgroup in rootless container") +} + +func (m *Manager) Destroy() error { + // We don't have to do anything here because we didn't do any setup. + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go new file mode 100644 index 000000000..8eeedc55b --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go @@ -0,0 +1,108 @@ +// +build linux + +package cgroups + +type ThrottlingData struct { + // Number of periods with throttling active + Periods uint64 `json:"periods,omitempty"` + // Number of periods when the container hit its throttling limit. + ThrottledPeriods uint64 `json:"throttled_periods,omitempty"` + // Aggregate time the container was throttled for in nanoseconds. + ThrottledTime uint64 `json:"throttled_time,omitempty"` +} + +// CpuUsage denotes the usage of a CPU. +// All CPU stats are aggregate since container inception. +type CpuUsage struct { + // Total CPU time consumed. + // Units: nanoseconds. + TotalUsage uint64 `json:"total_usage,omitempty"` + // Total CPU time consumed per core. + // Units: nanoseconds. + PercpuUsage []uint64 `json:"percpu_usage,omitempty"` + // Time spent by tasks of the cgroup in kernel mode. + // Units: nanoseconds. + UsageInKernelmode uint64 `json:"usage_in_kernelmode"` + // Time spent by tasks of the cgroup in user mode. + // Units: nanoseconds. + UsageInUsermode uint64 `json:"usage_in_usermode"` +} + +type CpuStats struct { + CpuUsage CpuUsage `json:"cpu_usage,omitempty"` + ThrottlingData ThrottlingData `json:"throttling_data,omitempty"` +} + +type MemoryData struct { + Usage uint64 `json:"usage,omitempty"` + MaxUsage uint64 `json:"max_usage,omitempty"` + Failcnt uint64 `json:"failcnt"` + Limit uint64 `json:"limit"` +} + +type MemoryStats struct { + // memory used for cache + Cache uint64 `json:"cache,omitempty"` + // usage of memory + Usage MemoryData `json:"usage,omitempty"` + // usage of memory + swap + SwapUsage MemoryData `json:"swap_usage,omitempty"` + // usage of kernel memory + KernelUsage MemoryData `json:"kernel_usage,omitempty"` + // usage of kernel TCP memory + KernelTCPUsage MemoryData `json:"kernel_tcp_usage,omitempty"` + // if true, memory usage is accounted for throughout a hierarchy of cgroups. + UseHierarchy bool `json:"use_hierarchy"` + + Stats map[string]uint64 `json:"stats,omitempty"` +} + +type PidsStats struct { + // number of pids in the cgroup + Current uint64 `json:"current,omitempty"` + // active pids hard limit + Limit uint64 `json:"limit,omitempty"` +} + +type BlkioStatEntry struct { + Major uint64 `json:"major,omitempty"` + Minor uint64 `json:"minor,omitempty"` + Op string `json:"op,omitempty"` + Value uint64 `json:"value,omitempty"` +} + +type BlkioStats struct { + // number of bytes tranferred to and from the block device + IoServiceBytesRecursive []BlkioStatEntry `json:"io_service_bytes_recursive,omitempty"` + IoServicedRecursive []BlkioStatEntry `json:"io_serviced_recursive,omitempty"` + IoQueuedRecursive []BlkioStatEntry `json:"io_queue_recursive,omitempty"` + IoServiceTimeRecursive []BlkioStatEntry `json:"io_service_time_recursive,omitempty"` + IoWaitTimeRecursive []BlkioStatEntry `json:"io_wait_time_recursive,omitempty"` + IoMergedRecursive []BlkioStatEntry `json:"io_merged_recursive,omitempty"` + IoTimeRecursive []BlkioStatEntry `json:"io_time_recursive,omitempty"` + SectorsRecursive []BlkioStatEntry `json:"sectors_recursive,omitempty"` +} + +type HugetlbStats struct { + // current res_counter usage for hugetlb + Usage uint64 `json:"usage,omitempty"` + // maximum usage ever recorded. + MaxUsage uint64 `json:"max_usage,omitempty"` + // number of times hugetlb usage allocation failure. + Failcnt uint64 `json:"failcnt"` +} + +type Stats struct { + CpuStats CpuStats `json:"cpu_stats,omitempty"` + MemoryStats MemoryStats `json:"memory_stats,omitempty"` + PidsStats PidsStats `json:"pids_stats,omitempty"` + BlkioStats BlkioStats `json:"blkio_stats,omitempty"` + // the map is in the format "size of hugepage: stats of the hugepage" + HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"` +} + +func NewStats() *Stats { + memoryStats := MemoryStats{Stats: make(map[string]uint64)} + hugetlbStats := make(map[string]HugetlbStats) + return &Stats{MemoryStats: memoryStats, HugetlbStats: hugetlbStats} +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_nosystemd.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_nosystemd.go new file mode 100644 index 000000000..7de9ae605 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_nosystemd.go @@ -0,0 +1,55 @@ +// +build !linux + +package systemd + +import ( + "fmt" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type Manager struct { + Cgroups *configs.Cgroup + Paths map[string]string +} + +func UseSystemd() bool { + return false +} + +func (m *Manager) Apply(pid int) error { + return fmt.Errorf("Systemd not supported") +} + +func (m *Manager) GetPids() ([]int, error) { + return nil, fmt.Errorf("Systemd not supported") +} + +func (m *Manager) GetAllPids() ([]int, error) { + return nil, fmt.Errorf("Systemd not supported") +} + +func (m *Manager) Destroy() error { + return fmt.Errorf("Systemd not supported") +} + +func (m *Manager) GetPaths() map[string]string { + return nil +} + +func (m *Manager) GetStats() (*cgroups.Stats, error) { + return nil, fmt.Errorf("Systemd not supported") +} + +func (m *Manager) Set(container *configs.Config) error { + return nil, fmt.Errorf("Systemd not supported") +} + +func (m *Manager) Freeze(state configs.FreezerState) error { + return fmt.Errorf("Systemd not supported") +} + +func Freeze(c *configs.Cgroup, state configs.FreezerState) error { + return fmt.Errorf("Systemd not supported") +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_systemd.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_systemd.go new file mode 100644 index 000000000..b010b4b32 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_systemd.go @@ -0,0 +1,553 @@ +// +build linux + +package systemd + +import ( + "errors" + "fmt" + "os" + "path/filepath" + "strings" + "sync" + "time" + + systemdDbus "github.com/coreos/go-systemd/dbus" + systemdUtil "github.com/coreos/go-systemd/util" + "github.com/godbus/dbus" + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fs" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type Manager struct { + mu sync.Mutex + Cgroups *configs.Cgroup + Paths map[string]string +} + +type subsystem interface { + // Name returns the name of the subsystem. + Name() string + // Returns the stats, as 'stats', corresponding to the cgroup under 'path'. + GetStats(path string, stats *cgroups.Stats) error + // Set the cgroup represented by cgroup. + Set(path string, cgroup *configs.Cgroup) error +} + +var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist") + +type subsystemSet []subsystem + +func (s subsystemSet) Get(name string) (subsystem, error) { + for _, ss := range s { + if ss.Name() == name { + return ss, nil + } + } + return nil, errSubsystemDoesNotExist +} + +var subsystems = subsystemSet{ + &fs.CpusetGroup{}, + &fs.DevicesGroup{}, + &fs.MemoryGroup{}, + &fs.CpuGroup{}, + &fs.CpuacctGroup{}, + &fs.PidsGroup{}, + &fs.BlkioGroup{}, + &fs.HugetlbGroup{}, + &fs.PerfEventGroup{}, + &fs.FreezerGroup{}, + &fs.NetPrioGroup{}, + &fs.NetClsGroup{}, + &fs.NameGroup{GroupName: "name=systemd"}, +} + +const ( + testScopeWait = 4 + testSliceWait = 4 +) + +var ( + connLock sync.Mutex + theConn *systemdDbus.Conn + hasStartTransientUnit bool + hasStartTransientSliceUnit bool + hasTransientDefaultDependencies bool + hasDelegate bool +) + +func newProp(name string, units interface{}) systemdDbus.Property { + return systemdDbus.Property{ + Name: name, + Value: dbus.MakeVariant(units), + } +} + +func UseSystemd() bool { + if !systemdUtil.IsRunningSystemd() { + return false + } + + connLock.Lock() + defer connLock.Unlock() + + if theConn == nil { + var err error + theConn, err = systemdDbus.New() + if err != nil { + return false + } + + // Assume we have StartTransientUnit + hasStartTransientUnit = true + + // But if we get UnknownMethod error we don't + if _, err := theConn.StartTransientUnit("test.scope", "invalid", nil, nil); err != nil { + if dbusError, ok := err.(dbus.Error); ok { + if dbusError.Name == "org.freedesktop.DBus.Error.UnknownMethod" { + hasStartTransientUnit = false + return hasStartTransientUnit + } + } + } + + // Ensure the scope name we use doesn't exist. Use the Pid to + // avoid collisions between multiple libcontainer users on a + // single host. + scope := fmt.Sprintf("libcontainer-%d-systemd-test-default-dependencies.scope", os.Getpid()) + testScopeExists := true + for i := 0; i <= testScopeWait; i++ { + if _, err := theConn.StopUnit(scope, "replace", nil); err != nil { + if dbusError, ok := err.(dbus.Error); ok { + if strings.Contains(dbusError.Name, "org.freedesktop.systemd1.NoSuchUnit") { + testScopeExists = false + break + } + } + } + time.Sleep(time.Millisecond) + } + + // Bail out if we can't kill this scope without testing for DefaultDependencies + if testScopeExists { + return hasStartTransientUnit + } + + // Assume StartTransientUnit on a scope allows DefaultDependencies + hasTransientDefaultDependencies = true + ddf := newProp("DefaultDependencies", false) + if _, err := theConn.StartTransientUnit(scope, "replace", []systemdDbus.Property{ddf}, nil); err != nil { + if dbusError, ok := err.(dbus.Error); ok { + if strings.Contains(dbusError.Name, "org.freedesktop.DBus.Error.PropertyReadOnly") { + hasTransientDefaultDependencies = false + } + } + } + + // Not critical because of the stop unit logic above. + theConn.StopUnit(scope, "replace", nil) + + // Assume StartTransientUnit on a scope allows Delegate + hasDelegate = true + dl := newProp("Delegate", true) + if _, err := theConn.StartTransientUnit(scope, "replace", []systemdDbus.Property{dl}, nil); err != nil { + if dbusError, ok := err.(dbus.Error); ok { + if strings.Contains(dbusError.Name, "org.freedesktop.DBus.Error.PropertyReadOnly") { + hasDelegate = false + } + } + } + + // Assume we have the ability to start a transient unit as a slice + // This was broken until systemd v229, but has been back-ported on RHEL environments >= 219 + // For details, see: https://bugzilla.redhat.com/show_bug.cgi?id=1370299 + hasStartTransientSliceUnit = true + + // To ensure simple clean-up, we create a slice off the root with no hierarchy + slice := fmt.Sprintf("libcontainer_%d_systemd_test_default.slice", os.Getpid()) + if _, err := theConn.StartTransientUnit(slice, "replace", nil, nil); err != nil { + if _, ok := err.(dbus.Error); ok { + hasStartTransientSliceUnit = false + } + } + + for i := 0; i <= testSliceWait; i++ { + if _, err := theConn.StopUnit(slice, "replace", nil); err != nil { + if dbusError, ok := err.(dbus.Error); ok { + if strings.Contains(dbusError.Name, "org.freedesktop.systemd1.NoSuchUnit") { + hasStartTransientSliceUnit = false + break + } + } + } else { + break + } + time.Sleep(time.Millisecond) + } + + // Not critical because of the stop unit logic above. + theConn.StopUnit(scope, "replace", nil) + theConn.StopUnit(slice, "replace", nil) + } + return hasStartTransientUnit +} + +func (m *Manager) Apply(pid int) error { + var ( + c = m.Cgroups + unitName = getUnitName(c) + slice = "system.slice" + properties []systemdDbus.Property + ) + + if c.Paths != nil { + paths := make(map[string]string) + for name, path := range c.Paths { + _, err := getSubsystemPath(m.Cgroups, name) + if err != nil { + // Don't fail if a cgroup hierarchy was not found, just skip this subsystem + if cgroups.IsNotFound(err) { + continue + } + return err + } + paths[name] = path + } + m.Paths = paths + return cgroups.EnterPid(m.Paths, pid) + } + + if c.Parent != "" { + slice = c.Parent + } + + properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name)) + + // if we create a slice, the parent is defined via a Wants= + if strings.HasSuffix(unitName, ".slice") { + // This was broken until systemd v229, but has been back-ported on RHEL environments >= 219 + if !hasStartTransientSliceUnit { + return fmt.Errorf("systemd version does not support ability to start a slice as transient unit") + } + properties = append(properties, systemdDbus.PropWants(slice)) + } else { + // otherwise, we use Slice= + properties = append(properties, systemdDbus.PropSlice(slice)) + } + + // only add pid if its valid, -1 is used w/ general slice creation. + if pid != -1 { + properties = append(properties, newProp("PIDs", []uint32{uint32(pid)})) + } + + if hasDelegate { + // This is only supported on systemd versions 218 and above. + properties = append(properties, newProp("Delegate", true)) + } + + // Always enable accounting, this gets us the same behaviour as the fs implementation, + // plus the kernel has some problems with joining the memory cgroup at a later time. + properties = append(properties, + newProp("MemoryAccounting", true), + newProp("CPUAccounting", true), + newProp("BlockIOAccounting", true)) + + if hasTransientDefaultDependencies { + properties = append(properties, + newProp("DefaultDependencies", false)) + } + + if c.Resources.Memory != 0 { + properties = append(properties, + newProp("MemoryLimit", c.Resources.Memory)) + } + + if c.Resources.CpuShares != 0 { + properties = append(properties, + newProp("CPUShares", c.Resources.CpuShares)) + } + + // cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd. + if c.Resources.CpuQuota != 0 && c.Resources.CpuPeriod != 0 { + cpuQuotaPerSecUSec := uint64(c.Resources.CpuQuota*1000000) / c.Resources.CpuPeriod + properties = append(properties, + newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec)) + } + + if c.Resources.BlkioWeight != 0 { + properties = append(properties, + newProp("BlockIOWeight", uint64(c.Resources.BlkioWeight))) + } + + // We have to set kernel memory here, as we can't change it once + // processes have been attached to the cgroup. + if c.Resources.KernelMemory != 0 { + if err := setKernelMemory(c); err != nil { + return err + } + } + + if _, err := theConn.StartTransientUnit(unitName, "replace", properties, nil); err != nil && !isUnitExists(err) { + return err + } + + if err := joinCgroups(c, pid); err != nil { + return err + } + + paths := make(map[string]string) + for _, s := range subsystems { + subsystemPath, err := getSubsystemPath(m.Cgroups, s.Name()) + if err != nil { + // Don't fail if a cgroup hierarchy was not found, just skip this subsystem + if cgroups.IsNotFound(err) { + continue + } + return err + } + paths[s.Name()] = subsystemPath + } + m.Paths = paths + return nil +} + +func (m *Manager) Destroy() error { + if m.Cgroups.Paths != nil { + return nil + } + m.mu.Lock() + defer m.mu.Unlock() + theConn.StopUnit(getUnitName(m.Cgroups), "replace", nil) + if err := cgroups.RemovePaths(m.Paths); err != nil { + return err + } + m.Paths = make(map[string]string) + return nil +} + +func (m *Manager) GetPaths() map[string]string { + m.mu.Lock() + paths := m.Paths + m.mu.Unlock() + return paths +} + +func join(c *configs.Cgroup, subsystem string, pid int) (string, error) { + path, err := getSubsystemPath(c, subsystem) + if err != nil { + return "", err + } + if err := os.MkdirAll(path, 0755); err != nil { + return "", err + } + if err := cgroups.WriteCgroupProc(path, pid); err != nil { + return "", err + } + return path, nil +} + +func joinCgroups(c *configs.Cgroup, pid int) error { + for _, sys := range subsystems { + name := sys.Name() + switch name { + case "name=systemd": + // let systemd handle this + case "cpuset": + path, err := getSubsystemPath(c, name) + if err != nil && !cgroups.IsNotFound(err) { + return err + } + s := &fs.CpusetGroup{} + if err := s.ApplyDir(path, c, pid); err != nil { + return err + } + default: + _, err := join(c, name, pid) + if err != nil { + // Even if it's `not found` error, we'll return err + // because devices cgroup is hard requirement for + // container security. + if name == "devices" { + return err + } + // For other subsystems, omit the `not found` error + // because they are optional. + if !cgroups.IsNotFound(err) { + return err + } + } + } + } + + return nil +} + +// systemd represents slice hierarchy using `-`, so we need to follow suit when +// generating the path of slice. Essentially, test-a-b.slice becomes +// test.slice/test-a.slice/test-a-b.slice. +func ExpandSlice(slice string) (string, error) { + suffix := ".slice" + // Name has to end with ".slice", but can't be just ".slice". + if len(slice) < len(suffix) || !strings.HasSuffix(slice, suffix) { + return "", fmt.Errorf("invalid slice name: %s", slice) + } + + // Path-separators are not allowed. + if strings.Contains(slice, "/") { + return "", fmt.Errorf("invalid slice name: %s", slice) + } + + var path, prefix string + sliceName := strings.TrimSuffix(slice, suffix) + // if input was -.slice, we should just return root now + if sliceName == "-" { + return "/", nil + } + for _, component := range strings.Split(sliceName, "-") { + // test--a.slice isn't permitted, nor is -test.slice. + if component == "" { + return "", fmt.Errorf("invalid slice name: %s", slice) + } + + // Append the component to the path and to the prefix. + path += prefix + component + suffix + "/" + prefix += component + "-" + } + + return path, nil +} + +func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) { + mountpoint, err := cgroups.FindCgroupMountpoint(subsystem) + if err != nil { + return "", err + } + + initPath, err := cgroups.GetInitCgroup(subsystem) + if err != nil { + return "", err + } + // if pid 1 is systemd 226 or later, it will be in init.scope, not the root + initPath = strings.TrimSuffix(filepath.Clean(initPath), "init.scope") + + slice := "system.slice" + if c.Parent != "" { + slice = c.Parent + } + + slice, err = ExpandSlice(slice) + if err != nil { + return "", err + } + + return filepath.Join(mountpoint, initPath, slice, getUnitName(c)), nil +} + +func (m *Manager) Freeze(state configs.FreezerState) error { + path, err := getSubsystemPath(m.Cgroups, "freezer") + if err != nil { + return err + } + prevState := m.Cgroups.Resources.Freezer + m.Cgroups.Resources.Freezer = state + freezer, err := subsystems.Get("freezer") + if err != nil { + return err + } + err = freezer.Set(path, m.Cgroups) + if err != nil { + m.Cgroups.Resources.Freezer = prevState + return err + } + return nil +} + +func (m *Manager) GetPids() ([]int, error) { + path, err := getSubsystemPath(m.Cgroups, "devices") + if err != nil { + return nil, err + } + return cgroups.GetPids(path) +} + +func (m *Manager) GetAllPids() ([]int, error) { + path, err := getSubsystemPath(m.Cgroups, "devices") + if err != nil { + return nil, err + } + return cgroups.GetAllPids(path) +} + +func (m *Manager) GetStats() (*cgroups.Stats, error) { + m.mu.Lock() + defer m.mu.Unlock() + stats := cgroups.NewStats() + for name, path := range m.Paths { + sys, err := subsystems.Get(name) + if err == errSubsystemDoesNotExist || !cgroups.PathExists(path) { + continue + } + if err := sys.GetStats(path, stats); err != nil { + return nil, err + } + } + + return stats, nil +} + +func (m *Manager) Set(container *configs.Config) error { + // If Paths are set, then we are just joining cgroups paths + // and there is no need to set any values. + if m.Cgroups.Paths != nil { + return nil + } + for _, sys := range subsystems { + // Get the subsystem path, but don't error out for not found cgroups. + path, err := getSubsystemPath(container.Cgroups, sys.Name()) + if err != nil && !cgroups.IsNotFound(err) { + return err + } + + if err := sys.Set(path, container.Cgroups); err != nil { + return err + } + } + + if m.Paths["cpu"] != "" { + if err := fs.CheckCpushares(m.Paths["cpu"], container.Cgroups.Resources.CpuShares); err != nil { + return err + } + } + return nil +} + +func getUnitName(c *configs.Cgroup) string { + // by default, we create a scope unless the user explicitly asks for a slice. + if !strings.HasSuffix(c.Name, ".slice") { + return fmt.Sprintf("%s-%s.scope", c.ScopePrefix, c.Name) + } + return c.Name +} + +func setKernelMemory(c *configs.Cgroup) error { + path, err := getSubsystemPath(c, "memory") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + + if err := os.MkdirAll(path, 0755); err != nil { + return err + } + return fs.EnableKernelMemoryAccounting(path) +} + +// isUnitExists returns true if the error is that a systemd unit already exists. +func isUnitExists(err error) bool { + if err != nil { + if dbusError, ok := err.(dbus.Error); ok { + return strings.Contains(dbusError.Name, "org.freedesktop.systemd1.UnitExists") + } + } + return false +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go new file mode 100644 index 000000000..7c995efee --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go @@ -0,0 +1,462 @@ +// +build linux + +package cgroups + +import ( + "bufio" + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + "time" + + "github.com/docker/go-units" +) + +const ( + cgroupNamePrefix = "name=" + CgroupProcesses = "cgroup.procs" +) + +// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt +func FindCgroupMountpoint(subsystem string) (string, error) { + mnt, _, err := FindCgroupMountpointAndRoot(subsystem) + return mnt, err +} + +func FindCgroupMountpointAndRoot(subsystem string) (string, string, error) { + // We are not using mount.GetMounts() because it's super-inefficient, + // parsing it directly sped up x10 times because of not using Sscanf. + // It was one of two major performance drawbacks in container start. + if !isSubsystemAvailable(subsystem) { + return "", "", NewNotFoundError(subsystem) + } + f, err := os.Open("/proc/self/mountinfo") + if err != nil { + return "", "", err + } + defer f.Close() + + scanner := bufio.NewScanner(f) + for scanner.Scan() { + txt := scanner.Text() + fields := strings.Split(txt, " ") + for _, opt := range strings.Split(fields[len(fields)-1], ",") { + if opt == subsystem { + return fields[4], fields[3], nil + } + } + } + if err := scanner.Err(); err != nil { + return "", "", err + } + + return "", "", NewNotFoundError(subsystem) +} + +func isSubsystemAvailable(subsystem string) bool { + cgroups, err := ParseCgroupFile("/proc/self/cgroup") + if err != nil { + return false + } + _, avail := cgroups[subsystem] + return avail +} + +func GetClosestMountpointAncestor(dir, mountinfo string) string { + deepestMountPoint := "" + for _, mountInfoEntry := range strings.Split(mountinfo, "\n") { + mountInfoParts := strings.Fields(mountInfoEntry) + if len(mountInfoParts) < 5 { + continue + } + mountPoint := mountInfoParts[4] + if strings.HasPrefix(mountPoint, deepestMountPoint) && strings.HasPrefix(dir, mountPoint) { + deepestMountPoint = mountPoint + } + } + return deepestMountPoint +} + +func FindCgroupMountpointDir() (string, error) { + f, err := os.Open("/proc/self/mountinfo") + if err != nil { + return "", err + } + defer f.Close() + + scanner := bufio.NewScanner(f) + for scanner.Scan() { + text := scanner.Text() + fields := strings.Split(text, " ") + // Safe as mountinfo encodes mountpoints with spaces as \040. + index := strings.Index(text, " - ") + postSeparatorFields := strings.Fields(text[index+3:]) + numPostFields := len(postSeparatorFields) + + // This is an error as we can't detect if the mount is for "cgroup" + if numPostFields == 0 { + return "", fmt.Errorf("Found no fields post '-' in %q", text) + } + + if postSeparatorFields[0] == "cgroup" { + // Check that the mount is properly formated. + if numPostFields < 3 { + return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text) + } + + return filepath.Dir(fields[4]), nil + } + } + if err := scanner.Err(); err != nil { + return "", err + } + + return "", NewNotFoundError("cgroup") +} + +type Mount struct { + Mountpoint string + Root string + Subsystems []string +} + +func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) { + if len(m.Subsystems) == 0 { + return "", fmt.Errorf("no subsystem for mount") + } + + return getControllerPath(m.Subsystems[0], cgroups) +} + +func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount, error) { + res := make([]Mount, 0, len(ss)) + scanner := bufio.NewScanner(mi) + numFound := 0 + for scanner.Scan() && numFound < len(ss) { + txt := scanner.Text() + sepIdx := strings.Index(txt, " - ") + if sepIdx == -1 { + return nil, fmt.Errorf("invalid mountinfo format") + } + if txt[sepIdx+3:sepIdx+10] == "cgroup2" || txt[sepIdx+3:sepIdx+9] != "cgroup" { + continue + } + fields := strings.Split(txt, " ") + m := Mount{ + Mountpoint: fields[4], + Root: fields[3], + } + for _, opt := range strings.Split(fields[len(fields)-1], ",") { + if !ss[opt] { + continue + } + if strings.HasPrefix(opt, cgroupNamePrefix) { + m.Subsystems = append(m.Subsystems, opt[len(cgroupNamePrefix):]) + } else { + m.Subsystems = append(m.Subsystems, opt) + } + if !all { + numFound++ + } + } + res = append(res, m) + } + if err := scanner.Err(); err != nil { + return nil, err + } + return res, nil +} + +// GetCgroupMounts returns the mounts for the cgroup subsystems. +// all indicates whether to return just the first instance or all the mounts. +func GetCgroupMounts(all bool) ([]Mount, error) { + f, err := os.Open("/proc/self/mountinfo") + if err != nil { + return nil, err + } + defer f.Close() + + allSubsystems, err := ParseCgroupFile("/proc/self/cgroup") + if err != nil { + return nil, err + } + + allMap := make(map[string]bool) + for s := range allSubsystems { + allMap[s] = true + } + return getCgroupMountsHelper(allMap, f, all) +} + +// GetAllSubsystems returns all the cgroup subsystems supported by the kernel +func GetAllSubsystems() ([]string, error) { + f, err := os.Open("/proc/cgroups") + if err != nil { + return nil, err + } + defer f.Close() + + subsystems := []string{} + + s := bufio.NewScanner(f) + for s.Scan() { + text := s.Text() + if text[0] != '#' { + parts := strings.Fields(text) + if len(parts) >= 4 && parts[3] != "0" { + subsystems = append(subsystems, parts[0]) + } + } + } + if err := s.Err(); err != nil { + return nil, err + } + return subsystems, nil +} + +// GetOwnCgroup returns the relative path to the cgroup docker is running in. +func GetOwnCgroup(subsystem string) (string, error) { + cgroups, err := ParseCgroupFile("/proc/self/cgroup") + if err != nil { + return "", err + } + + return getControllerPath(subsystem, cgroups) +} + +func GetOwnCgroupPath(subsystem string) (string, error) { + cgroup, err := GetOwnCgroup(subsystem) + if err != nil { + return "", err + } + + return getCgroupPathHelper(subsystem, cgroup) +} + +func GetInitCgroup(subsystem string) (string, error) { + cgroups, err := ParseCgroupFile("/proc/1/cgroup") + if err != nil { + return "", err + } + + return getControllerPath(subsystem, cgroups) +} + +func GetInitCgroupPath(subsystem string) (string, error) { + cgroup, err := GetInitCgroup(subsystem) + if err != nil { + return "", err + } + + return getCgroupPathHelper(subsystem, cgroup) +} + +func getCgroupPathHelper(subsystem, cgroup string) (string, error) { + mnt, root, err := FindCgroupMountpointAndRoot(subsystem) + if err != nil { + return "", err + } + + // This is needed for nested containers, because in /proc/self/cgroup we + // see pathes from host, which don't exist in container. + relCgroup, err := filepath.Rel(root, cgroup) + if err != nil { + return "", err + } + + return filepath.Join(mnt, relCgroup), nil +} + +func readProcsFile(dir string) ([]int, error) { + f, err := os.Open(filepath.Join(dir, CgroupProcesses)) + if err != nil { + return nil, err + } + defer f.Close() + + var ( + s = bufio.NewScanner(f) + out = []int{} + ) + + for s.Scan() { + if t := s.Text(); t != "" { + pid, err := strconv.Atoi(t) + if err != nil { + return nil, err + } + out = append(out, pid) + } + } + return out, nil +} + +// ParseCgroupFile parses the given cgroup file, typically from +// /proc/<pid>/cgroup, into a map of subgroups to cgroup names. +func ParseCgroupFile(path string) (map[string]string, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + defer f.Close() + + return parseCgroupFromReader(f) +} + +// helper function for ParseCgroupFile to make testing easier +func parseCgroupFromReader(r io.Reader) (map[string]string, error) { + s := bufio.NewScanner(r) + cgroups := make(map[string]string) + + for s.Scan() { + text := s.Text() + // from cgroups(7): + // /proc/[pid]/cgroup + // ... + // For each cgroup hierarchy ... there is one entry + // containing three colon-separated fields of the form: + // hierarchy-ID:subsystem-list:cgroup-path + parts := strings.SplitN(text, ":", 3) + if len(parts) < 3 { + return nil, fmt.Errorf("invalid cgroup entry: must contain at least two colons: %v", text) + } + + for _, subs := range strings.Split(parts[1], ",") { + cgroups[subs] = parts[2] + } + } + if err := s.Err(); err != nil { + return nil, err + } + + return cgroups, nil +} + +func getControllerPath(subsystem string, cgroups map[string]string) (string, error) { + + if p, ok := cgroups[subsystem]; ok { + return p, nil + } + + if p, ok := cgroups[cgroupNamePrefix+subsystem]; ok { + return p, nil + } + + return "", NewNotFoundError(subsystem) +} + +func PathExists(path string) bool { + if _, err := os.Stat(path); err != nil { + return false + } + return true +} + +func EnterPid(cgroupPaths map[string]string, pid int) error { + for _, path := range cgroupPaths { + if PathExists(path) { + if err := WriteCgroupProc(path, pid); err != nil { + return err + } + } + } + return nil +} + +// RemovePaths iterates over the provided paths removing them. +// We trying to remove all paths five times with increasing delay between tries. +// If after all there are not removed cgroups - appropriate error will be +// returned. +func RemovePaths(paths map[string]string) (err error) { + delay := 10 * time.Millisecond + for i := 0; i < 5; i++ { + if i != 0 { + time.Sleep(delay) + delay *= 2 + } + for s, p := range paths { + os.RemoveAll(p) + // TODO: here probably should be logging + _, err := os.Stat(p) + // We need this strange way of checking cgroups existence because + // RemoveAll almost always returns error, even on already removed + // cgroups + if os.IsNotExist(err) { + delete(paths, s) + } + } + if len(paths) == 0 { + return nil + } + } + return fmt.Errorf("Failed to remove paths: %v", paths) +} + +func GetHugePageSize() ([]string, error) { + var pageSizes []string + sizeList := []string{"B", "kB", "MB", "GB", "TB", "PB"} + files, err := ioutil.ReadDir("/sys/kernel/mm/hugepages") + if err != nil { + return pageSizes, err + } + for _, st := range files { + nameArray := strings.Split(st.Name(), "-") + pageSize, err := units.RAMInBytes(nameArray[1]) + if err != nil { + return []string{}, err + } + sizeString := units.CustomSize("%g%s", float64(pageSize), 1024.0, sizeList) + pageSizes = append(pageSizes, sizeString) + } + + return pageSizes, nil +} + +// GetPids returns all pids, that were added to cgroup at path. +func GetPids(path string) ([]int, error) { + return readProcsFile(path) +} + +// GetAllPids returns all pids, that were added to cgroup at path and to all its +// subcgroups. +func GetAllPids(path string) ([]int, error) { + var pids []int + // collect pids from all sub-cgroups + err := filepath.Walk(path, func(p string, info os.FileInfo, iErr error) error { + dir, file := filepath.Split(p) + if file != CgroupProcesses { + return nil + } + if iErr != nil { + return iErr + } + cPids, err := readProcsFile(dir) + if err != nil { + return err + } + pids = append(pids, cPids...) + return nil + }) + return pids, err +} + +// WriteCgroupProc writes the specified pid into the cgroup's cgroup.procs file +func WriteCgroupProc(dir string, pid int) error { + // Normally dir should not be empty, one case is that cgroup subsystem + // is not mounted, we will get empty dir, and we want it fail here. + if dir == "" { + return fmt.Errorf("no such directory for %s", CgroupProcesses) + } + + // Dont attach any pid to the cgroup if -1 is specified as a pid + if pid != -1 { + if err := ioutil.WriteFile(filepath.Join(dir, CgroupProcesses), []byte(strconv.Itoa(pid)), 0700); err != nil { + return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err) + } + } + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/compat_1.5_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/compat_1.5_linux.go new file mode 100644 index 000000000..c7bdf1f60 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/compat_1.5_linux.go @@ -0,0 +1,10 @@ +// +build linux,!go1.5 + +package libcontainer + +import "syscall" + +// GidMappingsEnableSetgroups was added in Go 1.5, so do nothing when building +// with earlier versions +func enableSetgroups(sys *syscall.SysProcAttr) { +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go new file mode 100644 index 000000000..e0f3ca165 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go @@ -0,0 +1,61 @@ +package configs + +import "fmt" + +// blockIODevice holds major:minor format supported in blkio cgroup +type blockIODevice struct { + // Major is the device's major number + Major int64 `json:"major"` + // Minor is the device's minor number + Minor int64 `json:"minor"` +} + +// WeightDevice struct holds a `major:minor weight`|`major:minor leaf_weight` pair +type WeightDevice struct { + blockIODevice + // Weight is the bandwidth rate for the device, range is from 10 to 1000 + Weight uint16 `json:"weight"` + // LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only + LeafWeight uint16 `json:"leafWeight"` +} + +// NewWeightDevice returns a configured WeightDevice pointer +func NewWeightDevice(major, minor int64, weight, leafWeight uint16) *WeightDevice { + wd := &WeightDevice{} + wd.Major = major + wd.Minor = minor + wd.Weight = weight + wd.LeafWeight = leafWeight + return wd +} + +// WeightString formats the struct to be writable to the cgroup specific file +func (wd *WeightDevice) WeightString() string { + return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.Weight) +} + +// LeafWeightString formats the struct to be writable to the cgroup specific file +func (wd *WeightDevice) LeafWeightString() string { + return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.LeafWeight) +} + +// ThrottleDevice struct holds a `major:minor rate_per_second` pair +type ThrottleDevice struct { + blockIODevice + // Rate is the IO rate limit per cgroup per device + Rate uint64 `json:"rate"` +} + +// NewThrottleDevice returns a configured ThrottleDevice pointer +func NewThrottleDevice(major, minor int64, rate uint64) *ThrottleDevice { + td := &ThrottleDevice{} + td.Major = major + td.Minor = minor + td.Rate = rate + return td +} + +// String formats the struct to be writable to the cgroup specific file +func (td *ThrottleDevice) String() string { + return fmt.Sprintf("%d:%d %d", td.Major, td.Minor, td.Rate) +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go new file mode 100644 index 000000000..e15a662f5 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go @@ -0,0 +1,122 @@ +package configs + +type FreezerState string + +const ( + Undefined FreezerState = "" + Frozen FreezerState = "FROZEN" + Thawed FreezerState = "THAWED" +) + +type Cgroup struct { + // Deprecated, use Path instead + Name string `json:"name,omitempty"` + + // name of parent of cgroup or slice + // Deprecated, use Path instead + Parent string `json:"parent,omitempty"` + + // Path specifies the path to cgroups that are created and/or joined by the container. + // The path is assumed to be relative to the host system cgroup mountpoint. + Path string `json:"path"` + + // ScopePrefix describes prefix for the scope name + ScopePrefix string `json:"scope_prefix"` + + // Paths represent the absolute cgroups paths to join. + // This takes precedence over Path. + Paths map[string]string + + // Resources contains various cgroups settings to apply + *Resources +} + +type Resources struct { + // If this is true allow access to any kind of device within the container. If false, allow access only to devices explicitly listed in the allowed_devices list. + // Deprecated + AllowAllDevices *bool `json:"allow_all_devices,omitempty"` + // Deprecated + AllowedDevices []*Device `json:"allowed_devices,omitempty"` + // Deprecated + DeniedDevices []*Device `json:"denied_devices,omitempty"` + + Devices []*Device `json:"devices"` + + // Memory limit (in bytes) + Memory int64 `json:"memory"` + + // Memory reservation or soft_limit (in bytes) + MemoryReservation int64 `json:"memory_reservation"` + + // Total memory usage (memory + swap); set `-1` to enable unlimited swap + MemorySwap int64 `json:"memory_swap"` + + // Kernel memory limit (in bytes) + KernelMemory int64 `json:"kernel_memory"` + + // Kernel memory limit for TCP use (in bytes) + KernelMemoryTCP int64 `json:"kernel_memory_tcp"` + + // CPU shares (relative weight vs. other containers) + CpuShares uint64 `json:"cpu_shares"` + + // CPU hardcap limit (in usecs). Allowed cpu time in a given period. + CpuQuota int64 `json:"cpu_quota"` + + // CPU period to be used for hardcapping (in usecs). 0 to use system default. + CpuPeriod uint64 `json:"cpu_period"` + + // How many time CPU will use in realtime scheduling (in usecs). + CpuRtRuntime int64 `json:"cpu_rt_quota"` + + // CPU period to be used for realtime scheduling (in usecs). + CpuRtPeriod uint64 `json:"cpu_rt_period"` + + // CPU to use + CpusetCpus string `json:"cpuset_cpus"` + + // MEM to use + CpusetMems string `json:"cpuset_mems"` + + // Process limit; set <= `0' to disable limit. + PidsLimit int64 `json:"pids_limit"` + + // Specifies per cgroup weight, range is from 10 to 1000. + BlkioWeight uint16 `json:"blkio_weight"` + + // Specifies tasks' weight in the given cgroup while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only + BlkioLeafWeight uint16 `json:"blkio_leaf_weight"` + + // Weight per cgroup per device, can override BlkioWeight. + BlkioWeightDevice []*WeightDevice `json:"blkio_weight_device"` + + // IO read rate limit per cgroup per device, bytes per second. + BlkioThrottleReadBpsDevice []*ThrottleDevice `json:"blkio_throttle_read_bps_device"` + + // IO write rate limit per cgroup per device, bytes per second. + BlkioThrottleWriteBpsDevice []*ThrottleDevice `json:"blkio_throttle_write_bps_device"` + + // IO read rate limit per cgroup per device, IO per second. + BlkioThrottleReadIOPSDevice []*ThrottleDevice `json:"blkio_throttle_read_iops_device"` + + // IO write rate limit per cgroup per device, IO per second. + BlkioThrottleWriteIOPSDevice []*ThrottleDevice `json:"blkio_throttle_write_iops_device"` + + // set the freeze value for the process + Freezer FreezerState `json:"freezer"` + + // Hugetlb limit (in bytes) + HugetlbLimit []*HugepageLimit `json:"hugetlb_limit"` + + // Whether to disable OOM Killer + OomKillDisable bool `json:"oom_kill_disable"` + + // Tuning swappiness behaviour per cgroup + MemorySwappiness *uint64 `json:"memory_swappiness"` + + // Set priority of network traffic for container + NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap"` + + // Set class identifier for container's network packets + NetClsClassid uint32 `json:"net_cls_classid_u"` +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unsupported.go new file mode 100644 index 000000000..95e2830a4 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unsupported.go @@ -0,0 +1,6 @@ +// +build !windows,!linux,!freebsd + +package configs + +type Cgroup struct { +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_windows.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_windows.go new file mode 100644 index 000000000..d74847b0d --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_windows.go @@ -0,0 +1,6 @@ +package configs + +// TODO Windows: This can ultimately be entirely factored out on Windows as +// cgroups are a Unix-specific construct. +type Cgroup struct { +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go new file mode 100644 index 000000000..269fffff3 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go @@ -0,0 +1,344 @@ +package configs + +import ( + "bytes" + "encoding/json" + "fmt" + "os/exec" + "time" + + "github.com/opencontainers/runtime-spec/specs-go" + + "github.com/sirupsen/logrus" +) + +type Rlimit struct { + Type int `json:"type"` + Hard uint64 `json:"hard"` + Soft uint64 `json:"soft"` +} + +// IDMap represents UID/GID Mappings for User Namespaces. +type IDMap struct { + ContainerID int `json:"container_id"` + HostID int `json:"host_id"` + Size int `json:"size"` +} + +// Seccomp represents syscall restrictions +// By default, only the native architecture of the kernel is allowed to be used +// for syscalls. Additional architectures can be added by specifying them in +// Architectures. +type Seccomp struct { + DefaultAction Action `json:"default_action"` + Architectures []string `json:"architectures"` + Syscalls []*Syscall `json:"syscalls"` +} + +// Action is taken upon rule match in Seccomp +type Action int + +const ( + Kill Action = iota + 1 + Errno + Trap + Allow + Trace +) + +// Operator is a comparison operator to be used when matching syscall arguments in Seccomp +type Operator int + +const ( + EqualTo Operator = iota + 1 + NotEqualTo + GreaterThan + GreaterThanOrEqualTo + LessThan + LessThanOrEqualTo + MaskEqualTo +) + +// Arg is a rule to match a specific syscall argument in Seccomp +type Arg struct { + Index uint `json:"index"` + Value uint64 `json:"value"` + ValueTwo uint64 `json:"value_two"` + Op Operator `json:"op"` +} + +// Syscall is a rule to match a syscall in Seccomp +type Syscall struct { + Name string `json:"name"` + Action Action `json:"action"` + Args []*Arg `json:"args"` +} + +// TODO Windows. Many of these fields should be factored out into those parts +// which are common across platforms, and those which are platform specific. + +// Config defines configuration options for executing a process inside a contained environment. +type Config struct { + // NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs + // This is a common option when the container is running in ramdisk + NoPivotRoot bool `json:"no_pivot_root"` + + // ParentDeathSignal specifies the signal that is sent to the container's process in the case + // that the parent process dies. + ParentDeathSignal int `json:"parent_death_signal"` + + // Path to a directory containing the container's root filesystem. + Rootfs string `json:"rootfs"` + + // Readonlyfs will remount the container's rootfs as readonly where only externally mounted + // bind mounts are writtable. + Readonlyfs bool `json:"readonlyfs"` + + // Specifies the mount propagation flags to be applied to /. + RootPropagation int `json:"rootPropagation"` + + // Mounts specify additional source and destination paths that will be mounted inside the container's + // rootfs and mount namespace if specified + Mounts []*Mount `json:"mounts"` + + // The device nodes that should be automatically created within the container upon container start. Note, make sure that the node is marked as allowed in the cgroup as well! + Devices []*Device `json:"devices"` + + MountLabel string `json:"mount_label"` + + // Hostname optionally sets the container's hostname if provided + Hostname string `json:"hostname"` + + // Namespaces specifies the container's namespaces that it should setup when cloning the init process + // If a namespace is not provided that namespace is shared from the container's parent process + Namespaces Namespaces `json:"namespaces"` + + // Capabilities specify the capabilities to keep when executing the process inside the container + // All capabilities not specified will be dropped from the processes capability mask + Capabilities *Capabilities `json:"capabilities"` + + // Networks specifies the container's network setup to be created + Networks []*Network `json:"networks"` + + // Routes can be specified to create entries in the route table as the container is started + Routes []*Route `json:"routes"` + + // Cgroups specifies specific cgroup settings for the various subsystems that the container is + // placed into to limit the resources the container has available + Cgroups *Cgroup `json:"cgroups"` + + // AppArmorProfile specifies the profile to apply to the process running in the container and is + // change at the time the process is execed + AppArmorProfile string `json:"apparmor_profile,omitempty"` + + // ProcessLabel specifies the label to apply to the process running in the container. It is + // commonly used by selinux + ProcessLabel string `json:"process_label,omitempty"` + + // Rlimits specifies the resource limits, such as max open files, to set in the container + // If Rlimits are not set, the container will inherit rlimits from the parent process + Rlimits []Rlimit `json:"rlimits,omitempty"` + + // OomScoreAdj specifies the adjustment to be made by the kernel when calculating oom scores + // for a process. Valid values are between the range [-1000, '1000'], where processes with + // higher scores are preferred for being killed. + // More information about kernel oom score calculation here: https://lwn.net/Articles/317814/ + OomScoreAdj int `json:"oom_score_adj"` + + // UidMappings is an array of User ID mappings for User Namespaces + UidMappings []IDMap `json:"uid_mappings"` + + // GidMappings is an array of Group ID mappings for User Namespaces + GidMappings []IDMap `json:"gid_mappings"` + + // MaskPaths specifies paths within the container's rootfs to mask over with a bind + // mount pointing to /dev/null as to prevent reads of the file. + MaskPaths []string `json:"mask_paths"` + + // ReadonlyPaths specifies paths within the container's rootfs to remount as read-only + // so that these files prevent any writes. + ReadonlyPaths []string `json:"readonly_paths"` + + // Sysctl is a map of properties and their values. It is the equivalent of using + // sysctl -w my.property.name value in Linux. + Sysctl map[string]string `json:"sysctl"` + + // Seccomp allows actions to be taken whenever a syscall is made within the container. + // A number of rules are given, each having an action to be taken if a syscall matches it. + // A default action to be taken if no rules match is also given. + Seccomp *Seccomp `json:"seccomp"` + + // NoNewPrivileges controls whether processes in the container can gain additional privileges. + NoNewPrivileges bool `json:"no_new_privileges,omitempty"` + + // Hooks are a collection of actions to perform at various container lifecycle events. + // CommandHooks are serialized to JSON, but other hooks are not. + Hooks *Hooks + + // Version is the version of opencontainer specification that is supported. + Version string `json:"version"` + + // Labels are user defined metadata that is stored in the config and populated on the state + Labels []string `json:"labels"` + + // NoNewKeyring will not allocated a new session keyring for the container. It will use the + // callers keyring in this case. + NoNewKeyring bool `json:"no_new_keyring"` + + // Rootless specifies whether the container is a rootless container. + Rootless bool `json:"rootless"` +} + +type Hooks struct { + // Prestart commands are executed after the container namespaces are created, + // but before the user supplied command is executed from init. + Prestart []Hook + + // Poststart commands are executed after the container init process starts. + Poststart []Hook + + // Poststop commands are executed after the container init process exits. + Poststop []Hook +} + +type Capabilities struct { + // Bounding is the set of capabilities checked by the kernel. + Bounding []string + // Effective is the set of capabilities checked by the kernel. + Effective []string + // Inheritable is the capabilities preserved across execve. + Inheritable []string + // Permitted is the limiting superset for effective capabilities. + Permitted []string + // Ambient is the ambient set of capabilities that are kept. + Ambient []string +} + +func (hooks *Hooks) UnmarshalJSON(b []byte) error { + var state struct { + Prestart []CommandHook + Poststart []CommandHook + Poststop []CommandHook + } + + if err := json.Unmarshal(b, &state); err != nil { + return err + } + + deserialize := func(shooks []CommandHook) (hooks []Hook) { + for _, shook := range shooks { + hooks = append(hooks, shook) + } + + return hooks + } + + hooks.Prestart = deserialize(state.Prestart) + hooks.Poststart = deserialize(state.Poststart) + hooks.Poststop = deserialize(state.Poststop) + return nil +} + +func (hooks Hooks) MarshalJSON() ([]byte, error) { + serialize := func(hooks []Hook) (serializableHooks []CommandHook) { + for _, hook := range hooks { + switch chook := hook.(type) { + case CommandHook: + serializableHooks = append(serializableHooks, chook) + default: + logrus.Warnf("cannot serialize hook of type %T, skipping", hook) + } + } + + return serializableHooks + } + + return json.Marshal(map[string]interface{}{ + "prestart": serialize(hooks.Prestart), + "poststart": serialize(hooks.Poststart), + "poststop": serialize(hooks.Poststop), + }) +} + +// HookState is the payload provided to a hook on execution. +type HookState specs.State + +type Hook interface { + // Run executes the hook with the provided state. + Run(HookState) error +} + +// NewFunctionHook will call the provided function when the hook is run. +func NewFunctionHook(f func(HookState) error) FuncHook { + return FuncHook{ + run: f, + } +} + +type FuncHook struct { + run func(HookState) error +} + +func (f FuncHook) Run(s HookState) error { + return f.run(s) +} + +type Command struct { + Path string `json:"path"` + Args []string `json:"args"` + Env []string `json:"env"` + Dir string `json:"dir"` + Timeout *time.Duration `json:"timeout"` +} + +// NewCommandHook will execute the provided command when the hook is run. +func NewCommandHook(cmd Command) CommandHook { + return CommandHook{ + Command: cmd, + } +} + +type CommandHook struct { + Command +} + +func (c Command) Run(s HookState) error { + b, err := json.Marshal(s) + if err != nil { + return err + } + var stdout, stderr bytes.Buffer + cmd := exec.Cmd{ + Path: c.Path, + Args: c.Args, + Env: c.Env, + Stdin: bytes.NewReader(b), + Stdout: &stdout, + Stderr: &stderr, + } + if err := cmd.Start(); err != nil { + return err + } + errC := make(chan error, 1) + go func() { + err := cmd.Wait() + if err != nil { + err = fmt.Errorf("error running hook: %v, stdout: %s, stderr: %s", err, stdout.String(), stderr.String()) + } + errC <- err + }() + var timerCh <-chan time.Time + if c.Timeout != nil { + timer := time.NewTimer(*c.Timeout) + defer timer.Stop() + timerCh = timer.C + } + select { + case err := <-errC: + return err + case <-timerCh: + cmd.Process.Kill() + cmd.Wait() + return fmt.Errorf("hook ran past specified timeout of %.1fs", c.Timeout.Seconds()) + } +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go new file mode 100644 index 000000000..07da10804 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go @@ -0,0 +1,61 @@ +package configs + +import "fmt" + +// HostUID gets the translated uid for the process on host which could be +// different when user namespaces are enabled. +func (c Config) HostUID(containerId int) (int, error) { + if c.Namespaces.Contains(NEWUSER) { + if c.UidMappings == nil { + return -1, fmt.Errorf("User namespaces enabled, but no uid mappings found.") + } + id, found := c.hostIDFromMapping(containerId, c.UidMappings) + if !found { + return -1, fmt.Errorf("User namespaces enabled, but no user mapping found.") + } + return id, nil + } + // Return unchanged id. + return containerId, nil +} + +// HostRootUID gets the root uid for the process on host which could be non-zero +// when user namespaces are enabled. +func (c Config) HostRootUID() (int, error) { + return c.HostUID(0) +} + +// HostGID gets the translated gid for the process on host which could be +// different when user namespaces are enabled. +func (c Config) HostGID(containerId int) (int, error) { + if c.Namespaces.Contains(NEWUSER) { + if c.GidMappings == nil { + return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.") + } + id, found := c.hostIDFromMapping(containerId, c.GidMappings) + if !found { + return -1, fmt.Errorf("User namespaces enabled, but no group mapping found.") + } + return id, nil + } + // Return unchanged id. + return containerId, nil +} + +// HostRootGID gets the root gid for the process on host which could be non-zero +// when user namespaces are enabled. +func (c Config) HostRootGID() (int, error) { + return c.HostGID(0) +} + +// Utility function that gets a host ID for a container ID from user namespace map +// if that ID is present in the map. +func (c Config) hostIDFromMapping(containerID int, uMap []IDMap) (int, bool) { + for _, m := range uMap { + if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) { + hostID := m.HostID + (containerID - m.ContainerID) + return hostID, true + } + } + return -1, false +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/device.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/device.go new file mode 100644 index 000000000..8701bb212 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/device.go @@ -0,0 +1,57 @@ +package configs + +import ( + "fmt" + "os" +) + +const ( + Wildcard = -1 +) + +// TODO Windows: This can be factored out in the future + +type Device struct { + // Device type, block, char, etc. + Type rune `json:"type"` + + // Path to the device. + Path string `json:"path"` + + // Major is the device's major number. + Major int64 `json:"major"` + + // Minor is the device's minor number. + Minor int64 `json:"minor"` + + // Cgroup permissions format, rwm. + Permissions string `json:"permissions"` + + // FileMode permission bits for the device. + FileMode os.FileMode `json:"file_mode"` + + // Uid of the device. + Uid uint32 `json:"uid"` + + // Gid of the device. + Gid uint32 `json:"gid"` + + // Write the file to the allowed list + Allow bool `json:"allow"` +} + +func (d *Device) CgroupString() string { + return fmt.Sprintf("%c %s:%s %s", d.Type, deviceNumberString(d.Major), deviceNumberString(d.Minor), d.Permissions) +} + +func (d *Device) Mkdev() int { + return int((d.Major << 8) | (d.Minor & 0xff) | ((d.Minor & 0xfff00) << 12)) +} + +// deviceNumberString converts the device number to a string return result. +func deviceNumberString(number int64) string { + if number == Wildcard { + return "*" + } + return fmt.Sprint(number) +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/device_defaults.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/device_defaults.go new file mode 100644 index 000000000..4d348d217 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/device_defaults.go @@ -0,0 +1,111 @@ +// +build linux freebsd + +package configs + +var ( + // DefaultSimpleDevices are devices that are to be both allowed and created. + DefaultSimpleDevices = []*Device{ + // /dev/null and zero + { + Path: "/dev/null", + Type: 'c', + Major: 1, + Minor: 3, + Permissions: "rwm", + FileMode: 0666, + }, + { + Path: "/dev/zero", + Type: 'c', + Major: 1, + Minor: 5, + Permissions: "rwm", + FileMode: 0666, + }, + + { + Path: "/dev/full", + Type: 'c', + Major: 1, + Minor: 7, + Permissions: "rwm", + FileMode: 0666, + }, + + // consoles and ttys + { + Path: "/dev/tty", + Type: 'c', + Major: 5, + Minor: 0, + Permissions: "rwm", + FileMode: 0666, + }, + + // /dev/urandom,/dev/random + { + Path: "/dev/urandom", + Type: 'c', + Major: 1, + Minor: 9, + Permissions: "rwm", + FileMode: 0666, + }, + { + Path: "/dev/random", + Type: 'c', + Major: 1, + Minor: 8, + Permissions: "rwm", + FileMode: 0666, + }, + } + DefaultAllowedDevices = append([]*Device{ + // allow mknod for any device + { + Type: 'c', + Major: Wildcard, + Minor: Wildcard, + Permissions: "m", + }, + { + Type: 'b', + Major: Wildcard, + Minor: Wildcard, + Permissions: "m", + }, + + { + Path: "/dev/console", + Type: 'c', + Major: 5, + Minor: 1, + Permissions: "rwm", + }, + // /dev/pts/ - pts namespaces are "coming soon" + { + Path: "", + Type: 'c', + Major: 136, + Minor: Wildcard, + Permissions: "rwm", + }, + { + Path: "", + Type: 'c', + Major: 5, + Minor: 2, + Permissions: "rwm", + }, + + // tuntap + { + Path: "", + Type: 'c', + Major: 10, + Minor: 200, + Permissions: "rwm", + }, + }, DefaultSimpleDevices...) + DefaultAutoCreatedDevices = append([]*Device{}, DefaultSimpleDevices...) +) diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/hugepage_limit.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/hugepage_limit.go new file mode 100644 index 000000000..d30216380 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/hugepage_limit.go @@ -0,0 +1,9 @@ +package configs + +type HugepageLimit struct { + // which type of hugepage to limit. + Pagesize string `json:"page_size"` + + // usage limit for hugepage. + Limit uint64 `json:"limit"` +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/interface_priority_map.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/interface_priority_map.go new file mode 100644 index 000000000..9a0395eaf --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/interface_priority_map.go @@ -0,0 +1,14 @@ +package configs + +import ( + "fmt" +) + +type IfPrioMap struct { + Interface string `json:"interface"` + Priority int64 `json:"priority"` +} + +func (i *IfPrioMap) CgroupString() string { + return fmt.Sprintf("%s %d", i.Interface, i.Priority) +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go new file mode 100644 index 000000000..670757ddb --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go @@ -0,0 +1,39 @@ +package configs + +const ( + // EXT_COPYUP is a directive to copy up the contents of a directory when + // a tmpfs is mounted over it. + EXT_COPYUP = 1 << iota +) + +type Mount struct { + // Source path for the mount. + Source string `json:"source"` + + // Destination path for the mount inside the container. + Destination string `json:"destination"` + + // Device the mount is for. + Device string `json:"device"` + + // Mount flags. + Flags int `json:"flags"` + + // Propagation Flags + PropagationFlags []int `json:"propagation_flags"` + + // Mount data applied to the mount. + Data string `json:"data"` + + // Relabel source if set, "z" indicates shared, "Z" indicates unshared. + Relabel string `json:"relabel"` + + // Extensions are additional flags that are specific to runc. + Extensions int `json:"extensions"` + + // Optional Command to be run before Source is mounted. + PremountCmds []Command `json:"premount_cmds"` + + // Optional Command to be run after Source is mounted. + PostmountCmds []Command `json:"postmount_cmds"` +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces.go new file mode 100644 index 000000000..a3329a31a --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces.go @@ -0,0 +1,5 @@ +package configs + +type NamespaceType string + +type Namespaces []Namespace diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go new file mode 100644 index 000000000..5fc171a57 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go @@ -0,0 +1,122 @@ +package configs + +import ( + "fmt" + "os" + "sync" +) + +const ( + NEWNET NamespaceType = "NEWNET" + NEWPID NamespaceType = "NEWPID" + NEWNS NamespaceType = "NEWNS" + NEWUTS NamespaceType = "NEWUTS" + NEWIPC NamespaceType = "NEWIPC" + NEWUSER NamespaceType = "NEWUSER" +) + +var ( + nsLock sync.Mutex + supportedNamespaces = make(map[NamespaceType]bool) +) + +// NsName converts the namespace type to its filename +func NsName(ns NamespaceType) string { + switch ns { + case NEWNET: + return "net" + case NEWNS: + return "mnt" + case NEWPID: + return "pid" + case NEWIPC: + return "ipc" + case NEWUSER: + return "user" + case NEWUTS: + return "uts" + } + return "" +} + +// IsNamespaceSupported returns whether a namespace is available or +// not +func IsNamespaceSupported(ns NamespaceType) bool { + nsLock.Lock() + defer nsLock.Unlock() + supported, ok := supportedNamespaces[ns] + if ok { + return supported + } + nsFile := NsName(ns) + // if the namespace type is unknown, just return false + if nsFile == "" { + return false + } + _, err := os.Stat(fmt.Sprintf("/proc/self/ns/%s", nsFile)) + // a namespace is supported if it exists and we have permissions to read it + supported = err == nil + supportedNamespaces[ns] = supported + return supported +} + +func NamespaceTypes() []NamespaceType { + return []NamespaceType{ + NEWUSER, // Keep user NS always first, don't move it. + NEWIPC, + NEWUTS, + NEWNET, + NEWPID, + NEWNS, + } +} + +// Namespace defines configuration for each namespace. It specifies an +// alternate path that is able to be joined via setns. +type Namespace struct { + Type NamespaceType `json:"type"` + Path string `json:"path"` +} + +func (n *Namespace) GetPath(pid int) string { + return fmt.Sprintf("/proc/%d/ns/%s", pid, NsName(n.Type)) +} + +func (n *Namespaces) Remove(t NamespaceType) bool { + i := n.index(t) + if i == -1 { + return false + } + *n = append((*n)[:i], (*n)[i+1:]...) + return true +} + +func (n *Namespaces) Add(t NamespaceType, path string) { + i := n.index(t) + if i == -1 { + *n = append(*n, Namespace{Type: t, Path: path}) + return + } + (*n)[i].Path = path +} + +func (n *Namespaces) index(t NamespaceType) int { + for i, ns := range *n { + if ns.Type == t { + return i + } + } + return -1 +} + +func (n *Namespaces) Contains(t NamespaceType) bool { + return n.index(t) != -1 +} + +func (n *Namespaces) PathOf(t NamespaceType) string { + i := n.index(t) + if i == -1 { + return "" + } + return (*n)[i].Path +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go new file mode 100644 index 000000000..4ce6813d2 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go @@ -0,0 +1,31 @@ +// +build linux + +package configs + +import "golang.org/x/sys/unix" + +func (n *Namespace) Syscall() int { + return namespaceInfo[n.Type] +} + +var namespaceInfo = map[NamespaceType]int{ + NEWNET: unix.CLONE_NEWNET, + NEWNS: unix.CLONE_NEWNS, + NEWUSER: unix.CLONE_NEWUSER, + NEWIPC: unix.CLONE_NEWIPC, + NEWUTS: unix.CLONE_NEWUTS, + NEWPID: unix.CLONE_NEWPID, +} + +// CloneFlags parses the container's Namespaces options to set the correct +// flags on clone, unshare. This function returns flags only for new namespaces. +func (n *Namespaces) CloneFlags() uintptr { + var flag int + for _, v := range *n { + if v.Path != "" { + continue + } + flag |= namespaceInfo[v.Type] + } + return uintptr(flag) +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go new file mode 100644 index 000000000..5d9a5c81f --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go @@ -0,0 +1,13 @@ +// +build !linux,!windows + +package configs + +func (n *Namespace) Syscall() int { + panic("No namespace syscall support") +} + +// CloneFlags parses the container's Namespaces options to set the correct +// flags on clone, unshare. This function returns flags only for new namespaces. +func (n *Namespaces) CloneFlags() uintptr { + panic("No namespace syscall support") +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go new file mode 100644 index 000000000..19bf713de --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go @@ -0,0 +1,8 @@ +// +build !linux + +package configs + +// Namespace defines configuration for each namespace. It specifies an +// alternate path that is able to be joined via setns. +type Namespace struct { +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/network.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/network.go new file mode 100644 index 000000000..ccdb228e1 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/network.go @@ -0,0 +1,72 @@ +package configs + +// Network defines configuration for a container's networking stack +// +// The network configuration can be omitted from a container causing the +// container to be setup with the host's networking stack +type Network struct { + // Type sets the networks type, commonly veth and loopback + Type string `json:"type"` + + // Name of the network interface + Name string `json:"name"` + + // The bridge to use. + Bridge string `json:"bridge"` + + // MacAddress contains the MAC address to set on the network interface + MacAddress string `json:"mac_address"` + + // Address contains the IPv4 and mask to set on the network interface + Address string `json:"address"` + + // Gateway sets the gateway address that is used as the default for the interface + Gateway string `json:"gateway"` + + // IPv6Address contains the IPv6 and mask to set on the network interface + IPv6Address string `json:"ipv6_address"` + + // IPv6Gateway sets the ipv6 gateway address that is used as the default for the interface + IPv6Gateway string `json:"ipv6_gateway"` + + // Mtu sets the mtu value for the interface and will be mirrored on both the host and + // container's interfaces if a pair is created, specifically in the case of type veth + // Note: This does not apply to loopback interfaces. + Mtu int `json:"mtu"` + + // TxQueueLen sets the tx_queuelen value for the interface and will be mirrored on both the host and + // container's interfaces if a pair is created, specifically in the case of type veth + // Note: This does not apply to loopback interfaces. + TxQueueLen int `json:"txqueuelen"` + + // HostInterfaceName is a unique name of a veth pair that resides on in the host interface of the + // container. + HostInterfaceName string `json:"host_interface_name"` + + // HairpinMode specifies if hairpin NAT should be enabled on the virtual interface + // bridge port in the case of type veth + // Note: This is unsupported on some systems. + // Note: This does not apply to loopback interfaces. + HairpinMode bool `json:"hairpin_mode"` +} + +// Routes can be specified to create entries in the route table as the container is started +// +// All of destination, source, and gateway should be either IPv4 or IPv6. +// One of the three options must be present, and omitted entries will use their +// IP family default for the route table. For IPv4 for example, setting the +// gateway to 1.2.3.4 and the interface to eth0 will set up a standard +// destination of 0.0.0.0(or *) when viewed in the route table. +type Route struct { + // Sets the destination and mask, should be a CIDR. Accepts IPv4 and IPv6 + Destination string `json:"destination"` + + // Sets the source and mask, should be a CIDR. Accepts IPv4 and IPv6 + Source string `json:"source"` + + // Sets the gateway. Accepts IPv4 and IPv6 + Gateway string `json:"gateway"` + + // The device to set this route up for, for example: eth0 + InterfaceName string `json:"interface_name"` +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go new file mode 100644 index 000000000..0cebfaf80 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go @@ -0,0 +1,117 @@ +package validate + +import ( + "fmt" + "os" + "reflect" + "strings" + + "github.com/opencontainers/runc/libcontainer/configs" +) + +var ( + geteuid = os.Geteuid + getegid = os.Getegid +) + +func (v *ConfigValidator) rootless(config *configs.Config) error { + if err := rootlessMappings(config); err != nil { + return err + } + if err := rootlessMount(config); err != nil { + return err + } + // Currently, cgroups cannot effectively be used in rootless containers. + // The new cgroup namespace doesn't really help us either because it doesn't + // have nice interactions with the user namespace (we're working with upstream + // to fix this). + if err := rootlessCgroup(config); err != nil { + return err + } + + // XXX: We currently can't verify the user config at all, because + // configs.Config doesn't store the user-related configs. So this + // has to be verified by setupUser() in init_linux.go. + + return nil +} + +func rootlessMappings(config *configs.Config) error { + rootuid, err := config.HostRootUID() + if err != nil { + return fmt.Errorf("failed to get root uid from uidMappings: %v", err) + } + if euid := geteuid(); euid != 0 { + if !config.Namespaces.Contains(configs.NEWUSER) { + return fmt.Errorf("rootless containers require user namespaces") + } + if rootuid != euid { + return fmt.Errorf("rootless containers cannot map container root to a different host user") + } + } + + rootgid, err := config.HostRootGID() + if err != nil { + return fmt.Errorf("failed to get root gid from gidMappings: %v", err) + } + + // Similar to the above test, we need to make sure that we aren't trying to + // map to a group ID that we don't have the right to be. + if rootgid != getegid() { + return fmt.Errorf("rootless containers cannot map container root to a different host group") + } + + // We can only map one user and group inside a container (our own). + if len(config.UidMappings) != 1 || config.UidMappings[0].Size != 1 { + return fmt.Errorf("rootless containers cannot map more than one user") + } + if len(config.GidMappings) != 1 || config.GidMappings[0].Size != 1 { + return fmt.Errorf("rootless containers cannot map more than one group") + } + + return nil +} + +// cgroup verifies that the user isn't trying to set any cgroup limits or paths. +func rootlessCgroup(config *configs.Config) error { + // Nothing set at all. + if config.Cgroups == nil || config.Cgroups.Resources == nil { + return nil + } + + // Used for comparing to the zero value. + left := reflect.ValueOf(*config.Cgroups.Resources) + right := reflect.Zero(left.Type()) + + // This is all we need to do, since specconv won't add cgroup options in + // rootless mode. + if !reflect.DeepEqual(left.Interface(), right.Interface()) { + return fmt.Errorf("cannot specify resource limits in rootless container") + } + + return nil +} + +// mount verifies that the user isn't trying to set up any mounts they don't have +// the rights to do. In addition, it makes sure that no mount has a `uid=` or +// `gid=` option that doesn't resolve to root. +func rootlessMount(config *configs.Config) error { + // XXX: We could whitelist allowed devices at this point, but I'm not + // convinced that's a good idea. The kernel is the best arbiter of + // access control. + + for _, mount := range config.Mounts { + // Check that the options list doesn't contain any uid= or gid= entries + // that don't resolve to root. + for _, opt := range strings.Split(mount.Data, ",") { + if strings.HasPrefix(opt, "uid=") && opt != "uid=0" { + return fmt.Errorf("cannot specify uid= mount options in rootless containers where argument isn't 0") + } + if strings.HasPrefix(opt, "gid=") && opt != "gid=0" { + return fmt.Errorf("cannot specify gid= mount options in rootless containers where argument isn't 0") + } + } + } + + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go new file mode 100644 index 000000000..828434544 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go @@ -0,0 +1,195 @@ +package validate + +import ( + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/opencontainers/runc/libcontainer/configs" + selinux "github.com/opencontainers/selinux/go-selinux" +) + +type Validator interface { + Validate(*configs.Config) error +} + +func New() Validator { + return &ConfigValidator{} +} + +type ConfigValidator struct { +} + +func (v *ConfigValidator) Validate(config *configs.Config) error { + if err := v.rootfs(config); err != nil { + return err + } + if err := v.network(config); err != nil { + return err + } + if err := v.hostname(config); err != nil { + return err + } + if err := v.security(config); err != nil { + return err + } + if err := v.usernamespace(config); err != nil { + return err + } + if err := v.sysctl(config); err != nil { + return err + } + if config.Rootless { + if err := v.rootless(config); err != nil { + return err + } + } + return nil +} + +// rootfs validates if the rootfs is an absolute path and is not a symlink +// to the container's root filesystem. +func (v *ConfigValidator) rootfs(config *configs.Config) error { + if _, err := os.Stat(config.Rootfs); err != nil { + if os.IsNotExist(err) { + return fmt.Errorf("rootfs (%s) does not exist", config.Rootfs) + } + return err + } + cleaned, err := filepath.Abs(config.Rootfs) + if err != nil { + return err + } + if cleaned, err = filepath.EvalSymlinks(cleaned); err != nil { + return err + } + if filepath.Clean(config.Rootfs) != cleaned { + return fmt.Errorf("%s is not an absolute path or is a symlink", config.Rootfs) + } + return nil +} + +func (v *ConfigValidator) network(config *configs.Config) error { + if !config.Namespaces.Contains(configs.NEWNET) { + if len(config.Networks) > 0 || len(config.Routes) > 0 { + return fmt.Errorf("unable to apply network settings without a private NET namespace") + } + } + return nil +} + +func (v *ConfigValidator) hostname(config *configs.Config) error { + if config.Hostname != "" && !config.Namespaces.Contains(configs.NEWUTS) { + return fmt.Errorf("unable to set hostname without a private UTS namespace") + } + return nil +} + +func (v *ConfigValidator) security(config *configs.Config) error { + // restrict sys without mount namespace + if (len(config.MaskPaths) > 0 || len(config.ReadonlyPaths) > 0) && + !config.Namespaces.Contains(configs.NEWNS) { + return fmt.Errorf("unable to restrict sys entries without a private MNT namespace") + } + if config.ProcessLabel != "" && !selinux.GetEnabled() { + return fmt.Errorf("selinux label is specified in config, but selinux is disabled or not supported") + } + + return nil +} + +func (v *ConfigValidator) usernamespace(config *configs.Config) error { + if config.Namespaces.Contains(configs.NEWUSER) { + if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) { + return fmt.Errorf("USER namespaces aren't enabled in the kernel") + } + } else { + if config.UidMappings != nil || config.GidMappings != nil { + return fmt.Errorf("User namespace mappings specified, but USER namespace isn't enabled in the config") + } + } + return nil +} + +// sysctl validates that the specified sysctl keys are valid or not. +// /proc/sys isn't completely namespaced and depending on which namespaces +// are specified, a subset of sysctls are permitted. +func (v *ConfigValidator) sysctl(config *configs.Config) error { + validSysctlMap := map[string]bool{ + "kernel.msgmax": true, + "kernel.msgmnb": true, + "kernel.msgmni": true, + "kernel.sem": true, + "kernel.shmall": true, + "kernel.shmmax": true, + "kernel.shmmni": true, + "kernel.shm_rmid_forced": true, + } + + for s := range config.Sysctl { + if validSysctlMap[s] || strings.HasPrefix(s, "fs.mqueue.") { + if config.Namespaces.Contains(configs.NEWIPC) { + continue + } else { + return fmt.Errorf("sysctl %q is not allowed in the hosts ipc namespace", s) + } + } + if strings.HasPrefix(s, "net.") { + if config.Namespaces.Contains(configs.NEWNET) { + if path := config.Namespaces.PathOf(configs.NEWNET); path != "" { + if err := checkHostNs(s, path); err != nil { + return err + } + } + continue + } else { + return fmt.Errorf("sysctl %q is not allowed in the hosts network namespace", s) + } + } + return fmt.Errorf("sysctl %q is not in a separate kernel namespace", s) + } + + return nil +} + +func isSymbolicLink(path string) (bool, error) { + fi, err := os.Lstat(path) + if err != nil { + return false, err + } + + return fi.Mode()&os.ModeSymlink == os.ModeSymlink, nil +} + +// checkHostNs checks whether network sysctl is used in host namespace. +func checkHostNs(sysctlConfig string, path string) error { + var currentProcessNetns = "/proc/self/ns/net" + // readlink on the current processes network namespace + destOfCurrentProcess, err := os.Readlink(currentProcessNetns) + if err != nil { + return fmt.Errorf("read soft link %q error", currentProcessNetns) + } + + // First check if the provided path is a symbolic link + symLink, err := isSymbolicLink(path) + if err != nil { + return fmt.Errorf("could not check that %q is a symlink: %v", path, err) + } + + if symLink == false { + // The provided namespace is not a symbolic link, + // it is not the host namespace. + return nil + } + + // readlink on the path provided in the struct + destOfContainer, err := os.Readlink(path) + if err != nil { + return fmt.Errorf("read soft link %q error", path) + } + if destOfContainer == destOfCurrentProcess { + return fmt.Errorf("sysctl %q is not allowed in the hosts network namespace", sysctlConfig) + } + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/console.go b/vendor/github.com/opencontainers/runc/libcontainer/console.go new file mode 100644 index 000000000..917acc702 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/console.go @@ -0,0 +1,17 @@ +package libcontainer + +import ( + "io" + "os" +) + +// Console represents a pseudo TTY. +type Console interface { + io.ReadWriteCloser + + // Path returns the filesystem path to the slave side of the pty. + Path() string + + // Fd returns the fd for the master of the pty. + File() *os.File +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/console_freebsd.go b/vendor/github.com/opencontainers/runc/libcontainer/console_freebsd.go new file mode 100644 index 000000000..b7166a31f --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/console_freebsd.go @@ -0,0 +1,13 @@ +// +build freebsd + +package libcontainer + +import ( + "errors" +) + +// newConsole returns an initialized console that can be used within a container by copying bytes +// from the master side to the slave that is attached as the tty for the container's init process. +func newConsole() (Console, error) { + return nil, errors.New("libcontainer console is not supported on FreeBSD") +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/console_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/console_linux.go new file mode 100644 index 000000000..f70de3848 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/console_linux.go @@ -0,0 +1,152 @@ +package libcontainer + +import ( + "fmt" + "os" + "unsafe" + + "golang.org/x/sys/unix" +) + +func ConsoleFromFile(f *os.File) Console { + return &linuxConsole{ + master: f, + } +} + +// newConsole returns an initialized console that can be used within a container by copying bytes +// from the master side to the slave that is attached as the tty for the container's init process. +func newConsole() (Console, error) { + master, err := os.OpenFile("/dev/ptmx", unix.O_RDWR|unix.O_NOCTTY|unix.O_CLOEXEC, 0) + if err != nil { + return nil, err + } + console, err := ptsname(master) + if err != nil { + return nil, err + } + if err := unlockpt(master); err != nil { + return nil, err + } + return &linuxConsole{ + slavePath: console, + master: master, + }, nil +} + +// linuxConsole is a linux pseudo TTY for use within a container. +type linuxConsole struct { + master *os.File + slavePath string +} + +func (c *linuxConsole) File() *os.File { + return c.master +} + +func (c *linuxConsole) Path() string { + return c.slavePath +} + +func (c *linuxConsole) Read(b []byte) (int, error) { + return c.master.Read(b) +} + +func (c *linuxConsole) Write(b []byte) (int, error) { + return c.master.Write(b) +} + +func (c *linuxConsole) Close() error { + if m := c.master; m != nil { + return m.Close() + } + return nil +} + +// mount initializes the console inside the rootfs mounting with the specified mount label +// and applying the correct ownership of the console. +func (c *linuxConsole) mount() error { + oldMask := unix.Umask(0000) + defer unix.Umask(oldMask) + f, err := os.Create("/dev/console") + if err != nil && !os.IsExist(err) { + return err + } + if f != nil { + f.Close() + } + return unix.Mount(c.slavePath, "/dev/console", "bind", unix.MS_BIND, "") +} + +// dupStdio opens the slavePath for the console and dups the fds to the current +// processes stdio, fd 0,1,2. +func (c *linuxConsole) dupStdio() error { + slave, err := c.open(unix.O_RDWR) + if err != nil { + return err + } + fd := int(slave.Fd()) + for _, i := range []int{0, 1, 2} { + if err := unix.Dup3(fd, i, 0); err != nil { + return err + } + } + return nil +} + +// open is a clone of os.OpenFile without the O_CLOEXEC used to open the pty slave. +func (c *linuxConsole) open(flag int) (*os.File, error) { + r, e := unix.Open(c.slavePath, flag, 0) + if e != nil { + return nil, &os.PathError{ + Op: "open", + Path: c.slavePath, + Err: e, + } + } + return os.NewFile(uintptr(r), c.slavePath), nil +} + +func ioctl(fd uintptr, flag, data uintptr) error { + if _, _, err := unix.Syscall(unix.SYS_IOCTL, fd, flag, data); err != 0 { + return err + } + return nil +} + +// unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f. +// unlockpt should be called before opening the slave side of a pty. +func unlockpt(f *os.File) error { + var u int32 + return ioctl(f.Fd(), unix.TIOCSPTLCK, uintptr(unsafe.Pointer(&u))) +} + +// ptsname retrieves the name of the first available pts for the given master. +func ptsname(f *os.File) (string, error) { + n, err := unix.IoctlGetInt(int(f.Fd()), unix.TIOCGPTN) + if err != nil { + return "", err + } + return fmt.Sprintf("/dev/pts/%d", n), nil +} + +// SaneTerminal sets the necessary tty_ioctl(4)s to ensure that a pty pair +// created by us acts normally. In particular, a not-very-well-known default of +// Linux unix98 ptys is that they have +onlcr by default. While this isn't a +// problem for terminal emulators, because we relay data from the terminal we +// also relay that funky line discipline. +func SaneTerminal(terminal *os.File) error { + termios, err := unix.IoctlGetTermios(int(terminal.Fd()), unix.TCGETS) + if err != nil { + return fmt.Errorf("ioctl(tty, tcgets): %s", err.Error()) + } + + // Set -onlcr so we don't have to deal with \r. + termios.Oflag &^= unix.ONLCR + + if err := unix.IoctlSetTermios(int(terminal.Fd()), unix.TCSETS, termios); err != nil { + return fmt.Errorf("ioctl(tty, tcsets): %s", err.Error()) + } + + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/console_solaris.go b/vendor/github.com/opencontainers/runc/libcontainer/console_solaris.go new file mode 100644 index 000000000..e5ca54599 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/console_solaris.go @@ -0,0 +1,11 @@ +package libcontainer + +import ( + "errors" +) + +// newConsole returns an initialized console that can be used within a container by copying bytes +// from the master side to the slave that is attached as the tty for the container's init process. +func newConsole() (Console, error) { + return nil, errors.New("libcontainer console is not supported on Solaris") +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/console_windows.go b/vendor/github.com/opencontainers/runc/libcontainer/console_windows.go new file mode 100644 index 000000000..c61e866a5 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/console_windows.go @@ -0,0 +1,30 @@ +package libcontainer + +// newConsole returns an initialized console that can be used within a container +func newConsole() (Console, error) { + return &windowsConsole{}, nil +} + +// windowsConsole is a Windows pseudo TTY for use within a container. +type windowsConsole struct { +} + +func (c *windowsConsole) Fd() uintptr { + return 0 +} + +func (c *windowsConsole) Path() string { + return "" +} + +func (c *windowsConsole) Read(b []byte) (int, error) { + return 0, nil +} + +func (c *windowsConsole) Write(b []byte) (int, error) { + return 0, nil +} + +func (c *windowsConsole) Close() error { + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/container.go b/vendor/github.com/opencontainers/runc/libcontainer/container.go new file mode 100644 index 000000000..2e31b4d4f --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/container.go @@ -0,0 +1,166 @@ +// Package libcontainer provides a native Go implementation for creating containers +// with namespaces, cgroups, capabilities, and filesystem access controls. +// It allows you to manage the lifecycle of the container performing additional operations +// after the container is created. +package libcontainer + +import ( + "os" + "time" + + "github.com/opencontainers/runc/libcontainer/configs" +) + +// Status is the status of a container. +type Status int + +const ( + // Created is the status that denotes the container exists but has not been run yet. + Created Status = iota + // Running is the status that denotes the container exists and is running. + Running + // Pausing is the status that denotes the container exists, it is in the process of being paused. + Pausing + // Paused is the status that denotes the container exists, but all its processes are paused. + Paused + // Stopped is the status that denotes the container does not have a created or running process. + Stopped +) + +func (s Status) String() string { + switch s { + case Created: + return "created" + case Running: + return "running" + case Pausing: + return "pausing" + case Paused: + return "paused" + case Stopped: + return "stopped" + default: + return "unknown" + } +} + +// BaseState represents the platform agnostic pieces relating to a +// running container's state +type BaseState struct { + // ID is the container ID. + ID string `json:"id"` + + // InitProcessPid is the init process id in the parent namespace. + InitProcessPid int `json:"init_process_pid"` + + // InitProcessStartTime is the init process start time in clock cycles since boot time. + InitProcessStartTime uint64 `json:"init_process_start"` + + // Created is the unix timestamp for the creation time of the container in UTC + Created time.Time `json:"created"` + + // Config is the container's configuration. + Config configs.Config `json:"config"` +} + +// BaseContainer is a libcontainer container object. +// +// Each container is thread-safe within the same process. Since a container can +// be destroyed by a separate process, any function may return that the container +// was not found. BaseContainer includes methods that are platform agnostic. +type BaseContainer interface { + // Returns the ID of the container + ID() string + + // Returns the current status of the container. + // + // errors: + // ContainerNotExists - Container no longer exists, + // Systemerror - System error. + Status() (Status, error) + + // State returns the current container's state information. + // + // errors: + // SystemError - System error. + State() (*State, error) + + // Returns the current config of the container. + Config() configs.Config + + // Returns the PIDs inside this container. The PIDs are in the namespace of the calling process. + // + // errors: + // ContainerNotExists - Container no longer exists, + // Systemerror - System error. + // + // Some of the returned PIDs may no longer refer to processes in the Container, unless + // the Container state is PAUSED in which case every PID in the slice is valid. + Processes() ([]int, error) + + // Returns statistics for the container. + // + // errors: + // ContainerNotExists - Container no longer exists, + // Systemerror - System error. + Stats() (*Stats, error) + + // Set resources of container as configured + // + // We can use this to change resources when containers are running. + // + // errors: + // SystemError - System error. + Set(config configs.Config) error + + // Start a process inside the container. Returns error if process fails to + // start. You can track process lifecycle with passed Process structure. + // + // errors: + // ContainerNotExists - Container no longer exists, + // ConfigInvalid - config is invalid, + // ContainerPaused - Container is paused, + // SystemError - System error. + Start(process *Process) (err error) + + // Run immediately starts the process inside the container. Returns error if process + // fails to start. It does not block waiting for the exec fifo after start returns but + // opens the fifo after start returns. + // + // errors: + // ContainerNotExists - Container no longer exists, + // ConfigInvalid - config is invalid, + // ContainerPaused - Container is paused, + // SystemError - System error. + Run(process *Process) (err error) + + // Destroys the container, if its in a valid state, after killing any + // remaining running processes. + // + // Any event registrations are removed before the container is destroyed. + // No error is returned if the container is already destroyed. + // + // Running containers must first be stopped using Signal(..). + // Paused containers must first be resumed using Resume(..). + // + // errors: + // ContainerNotStopped - Container is still running, + // ContainerPaused - Container is paused, + // SystemError - System error. + Destroy() error + + // Signal sends the provided signal code to the container's initial process. + // + // If all is specified the signal is sent to all processes in the container + // including the initial process. + // + // errors: + // SystemError - System error. + Signal(s os.Signal, all bool) error + + // Exec signals the container to exec the users process at the end of the init. + // + // errors: + // SystemError - System error. + Exec() error +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/container_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/container_linux.go new file mode 100644 index 000000000..d7e7516e5 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/container_linux.go @@ -0,0 +1,1654 @@ +// +build linux + +package libcontainer + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "io/ioutil" + "net" + "os" + "os/exec" + "path/filepath" + "reflect" + "strings" + "sync" + "syscall" // only for SysProcAttr and Signal + "time" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/criurpc" + "github.com/opencontainers/runc/libcontainer/system" + "github.com/opencontainers/runc/libcontainer/utils" + + "github.com/golang/protobuf/proto" + "github.com/sirupsen/logrus" + "github.com/syndtr/gocapability/capability" + "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" +) + +const stdioFdCount = 3 + +type linuxContainer struct { + id string + root string + config *configs.Config + cgroupManager cgroups.Manager + initArgs []string + initProcess parentProcess + initProcessStartTime uint64 + criuPath string + m sync.Mutex + criuVersion int + state containerState + created time.Time +} + +// State represents a running container's state +type State struct { + BaseState + + // Platform specific fields below here + + // Specifies if the container was started under the rootless mode. + Rootless bool `json:"rootless"` + + // Path to all the cgroups setup for a container. Key is cgroup subsystem name + // with the value as the path. + CgroupPaths map[string]string `json:"cgroup_paths"` + + // NamespacePaths are filepaths to the container's namespaces. Key is the namespace type + // with the value as the path. + NamespacePaths map[configs.NamespaceType]string `json:"namespace_paths"` + + // Container's standard descriptors (std{in,out,err}), needed for checkpoint and restore + ExternalDescriptors []string `json:"external_descriptors,omitempty"` +} + +// Container is a libcontainer container object. +// +// Each container is thread-safe within the same process. Since a container can +// be destroyed by a separate process, any function may return that the container +// was not found. +type Container interface { + BaseContainer + + // Methods below here are platform specific + + // Checkpoint checkpoints the running container's state to disk using the criu(8) utility. + // + // errors: + // Systemerror - System error. + Checkpoint(criuOpts *CriuOpts) error + + // Restore restores the checkpointed container to a running state using the criu(8) utility. + // + // errors: + // Systemerror - System error. + Restore(process *Process, criuOpts *CriuOpts) error + + // If the Container state is RUNNING or CREATED, sets the Container state to PAUSING and pauses + // the execution of any user processes. Asynchronously, when the container finished being paused the + // state is changed to PAUSED. + // If the Container state is PAUSED, do nothing. + // + // errors: + // ContainerNotExists - Container no longer exists, + // ContainerNotRunning - Container not running or created, + // Systemerror - System error. + Pause() error + + // If the Container state is PAUSED, resumes the execution of any user processes in the + // Container before setting the Container state to RUNNING. + // If the Container state is RUNNING, do nothing. + // + // errors: + // ContainerNotExists - Container no longer exists, + // ContainerNotPaused - Container is not paused, + // Systemerror - System error. + Resume() error + + // NotifyOOM returns a read-only channel signaling when the container receives an OOM notification. + // + // errors: + // Systemerror - System error. + NotifyOOM() (<-chan struct{}, error) + + // NotifyMemoryPressure returns a read-only channel signaling when the container reaches a given pressure level + // + // errors: + // Systemerror - System error. + NotifyMemoryPressure(level PressureLevel) (<-chan struct{}, error) +} + +// ID returns the container's unique ID +func (c *linuxContainer) ID() string { + return c.id +} + +// Config returns the container's configuration +func (c *linuxContainer) Config() configs.Config { + return *c.config +} + +func (c *linuxContainer) Status() (Status, error) { + c.m.Lock() + defer c.m.Unlock() + return c.currentStatus() +} + +func (c *linuxContainer) State() (*State, error) { + c.m.Lock() + defer c.m.Unlock() + return c.currentState() +} + +func (c *linuxContainer) Processes() ([]int, error) { + pids, err := c.cgroupManager.GetAllPids() + if err != nil { + return nil, newSystemErrorWithCause(err, "getting all container pids from cgroups") + } + return pids, nil +} + +func (c *linuxContainer) Stats() (*Stats, error) { + var ( + err error + stats = &Stats{} + ) + if stats.CgroupStats, err = c.cgroupManager.GetStats(); err != nil { + return stats, newSystemErrorWithCause(err, "getting container stats from cgroups") + } + for _, iface := range c.config.Networks { + switch iface.Type { + case "veth": + istats, err := getNetworkInterfaceStats(iface.HostInterfaceName) + if err != nil { + return stats, newSystemErrorWithCausef(err, "getting network stats for interface %q", iface.HostInterfaceName) + } + stats.Interfaces = append(stats.Interfaces, istats) + } + } + return stats, nil +} + +func (c *linuxContainer) Set(config configs.Config) error { + c.m.Lock() + defer c.m.Unlock() + status, err := c.currentStatus() + if err != nil { + return err + } + if status == Stopped { + return newGenericError(fmt.Errorf("container not running"), ContainerNotRunning) + } + c.config = &config + return c.cgroupManager.Set(c.config) +} + +func (c *linuxContainer) Start(process *Process) error { + c.m.Lock() + defer c.m.Unlock() + status, err := c.currentStatus() + if err != nil { + return err + } + if status == Stopped { + if err := c.createExecFifo(); err != nil { + return err + } + } + if err := c.start(process, status == Stopped); err != nil { + if status == Stopped { + c.deleteExecFifo() + } + return err + } + return nil +} + +func (c *linuxContainer) Run(process *Process) error { + c.m.Lock() + status, err := c.currentStatus() + if err != nil { + c.m.Unlock() + return err + } + c.m.Unlock() + if err := c.Start(process); err != nil { + return err + } + if status == Stopped { + return c.exec() + } + return nil +} + +func (c *linuxContainer) Exec() error { + c.m.Lock() + defer c.m.Unlock() + return c.exec() +} + +func (c *linuxContainer) exec() error { + path := filepath.Join(c.root, execFifoFilename) + f, err := os.OpenFile(path, os.O_RDONLY, 0) + if err != nil { + return newSystemErrorWithCause(err, "open exec fifo for reading") + } + defer f.Close() + data, err := ioutil.ReadAll(f) + if err != nil { + return err + } + if len(data) > 0 { + os.Remove(path) + return nil + } + return fmt.Errorf("cannot start an already running container") +} + +func (c *linuxContainer) start(process *Process, isInit bool) error { + parent, err := c.newParentProcess(process, isInit) + if err != nil { + return newSystemErrorWithCause(err, "creating new parent process") + } + if err := parent.start(); err != nil { + // terminate the process to ensure that it properly is reaped. + if err := parent.terminate(); err != nil { + logrus.Warn(err) + } + return newSystemErrorWithCause(err, "starting container process") + } + // generate a timestamp indicating when the container was started + c.created = time.Now().UTC() + if isInit { + c.state = &createdState{ + c: c, + } + state, err := c.updateState(parent) + if err != nil { + return err + } + c.initProcessStartTime = state.InitProcessStartTime + + if c.config.Hooks != nil { + s := configs.HookState{ + Version: c.config.Version, + ID: c.id, + Pid: parent.pid(), + Bundle: utils.SearchLabels(c.config.Labels, "bundle"), + } + for i, hook := range c.config.Hooks.Poststart { + if err := hook.Run(s); err != nil { + if err := parent.terminate(); err != nil { + logrus.Warn(err) + } + return newSystemErrorWithCausef(err, "running poststart hook %d", i) + } + } + } + } else { + c.state = &runningState{ + c: c, + } + } + return nil +} + +func (c *linuxContainer) Signal(s os.Signal, all bool) error { + if all { + return signalAllProcesses(c.cgroupManager, s) + } + if err := c.initProcess.signal(s); err != nil { + return newSystemErrorWithCause(err, "signaling init process") + } + return nil +} + +func (c *linuxContainer) createExecFifo() error { + rootuid, err := c.Config().HostRootUID() + if err != nil { + return err + } + rootgid, err := c.Config().HostRootGID() + if err != nil { + return err + } + + fifoName := filepath.Join(c.root, execFifoFilename) + if _, err := os.Stat(fifoName); err == nil { + return fmt.Errorf("exec fifo %s already exists", fifoName) + } + oldMask := unix.Umask(0000) + if err := unix.Mkfifo(fifoName, 0622); err != nil { + unix.Umask(oldMask) + return err + } + unix.Umask(oldMask) + if err := os.Chown(fifoName, rootuid, rootgid); err != nil { + return err + } + return nil +} + +func (c *linuxContainer) deleteExecFifo() { + fifoName := filepath.Join(c.root, execFifoFilename) + os.Remove(fifoName) +} + +func (c *linuxContainer) newParentProcess(p *Process, doInit bool) (parentProcess, error) { + parentPipe, childPipe, err := utils.NewSockPair("init") + if err != nil { + return nil, newSystemErrorWithCause(err, "creating new init pipe") + } + cmd, err := c.commandTemplate(p, childPipe) + if err != nil { + return nil, newSystemErrorWithCause(err, "creating new command template") + } + if !doInit { + return c.newSetnsProcess(p, cmd, parentPipe, childPipe) + } + + // We only set up rootDir if we're not doing a `runc exec`. The reason for + // this is to avoid cases where a racing, unprivileged process inside the + // container can get access to the statedir file descriptor (which would + // allow for container rootfs escape). + rootDir, err := os.Open(c.root) + if err != nil { + return nil, err + } + cmd.ExtraFiles = append(cmd.ExtraFiles, rootDir) + cmd.Env = append(cmd.Env, + fmt.Sprintf("_LIBCONTAINER_STATEDIR=%d", stdioFdCount+len(cmd.ExtraFiles)-1)) + return c.newInitProcess(p, cmd, parentPipe, childPipe, rootDir) +} + +func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.Cmd, error) { + cmd := exec.Command(c.initArgs[0], c.initArgs[1:]...) + cmd.Stdin = p.Stdin + cmd.Stdout = p.Stdout + cmd.Stderr = p.Stderr + cmd.Dir = c.config.Rootfs + if cmd.SysProcAttr == nil { + cmd.SysProcAttr = &syscall.SysProcAttr{} + } + cmd.ExtraFiles = append(cmd.ExtraFiles, p.ExtraFiles...) + if p.ConsoleSocket != nil { + cmd.ExtraFiles = append(cmd.ExtraFiles, p.ConsoleSocket) + cmd.Env = append(cmd.Env, + fmt.Sprintf("_LIBCONTAINER_CONSOLE=%d", stdioFdCount+len(cmd.ExtraFiles)-1), + ) + } + cmd.ExtraFiles = append(cmd.ExtraFiles, childPipe) + cmd.Env = append(cmd.Env, + fmt.Sprintf("_LIBCONTAINER_INITPIPE=%d", stdioFdCount+len(cmd.ExtraFiles)-1), + ) + // NOTE: when running a container with no PID namespace and the parent process spawning the container is + // PID1 the pdeathsig is being delivered to the container's init process by the kernel for some reason + // even with the parent still running. + if c.config.ParentDeathSignal > 0 { + cmd.SysProcAttr.Pdeathsig = syscall.Signal(c.config.ParentDeathSignal) + } + return cmd, nil +} + +func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe, rootDir *os.File) (*initProcess, error) { + cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initStandard)) + nsMaps := make(map[configs.NamespaceType]string) + for _, ns := range c.config.Namespaces { + if ns.Path != "" { + nsMaps[ns.Type] = ns.Path + } + } + _, sharePidns := nsMaps[configs.NEWPID] + data, err := c.bootstrapData(c.config.Namespaces.CloneFlags(), nsMaps) + if err != nil { + return nil, err + } + return &initProcess{ + cmd: cmd, + childPipe: childPipe, + parentPipe: parentPipe, + manager: c.cgroupManager, + config: c.newInitConfig(p), + container: c, + process: p, + bootstrapData: data, + sharePidns: sharePidns, + rootDir: rootDir, + }, nil +} + +func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*setnsProcess, error) { + cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initSetns)) + state, err := c.currentState() + if err != nil { + return nil, newSystemErrorWithCause(err, "getting container's current state") + } + // for setns process, we don't have to set cloneflags as the process namespaces + // will only be set via setns syscall + data, err := c.bootstrapData(0, state.NamespacePaths) + if err != nil { + return nil, err + } + return &setnsProcess{ + cmd: cmd, + cgroupPaths: c.cgroupManager.GetPaths(), + childPipe: childPipe, + parentPipe: parentPipe, + config: c.newInitConfig(p), + process: p, + bootstrapData: data, + }, nil +} + +func (c *linuxContainer) newInitConfig(process *Process) *initConfig { + cfg := &initConfig{ + Config: c.config, + Args: process.Args, + Env: process.Env, + User: process.User, + AdditionalGroups: process.AdditionalGroups, + Cwd: process.Cwd, + Capabilities: process.Capabilities, + PassedFilesCount: len(process.ExtraFiles), + ContainerId: c.ID(), + NoNewPrivileges: c.config.NoNewPrivileges, + Rootless: c.config.Rootless, + AppArmorProfile: c.config.AppArmorProfile, + ProcessLabel: c.config.ProcessLabel, + Rlimits: c.config.Rlimits, + } + if process.NoNewPrivileges != nil { + cfg.NoNewPrivileges = *process.NoNewPrivileges + } + if process.AppArmorProfile != "" { + cfg.AppArmorProfile = process.AppArmorProfile + } + if process.Label != "" { + cfg.ProcessLabel = process.Label + } + if len(process.Rlimits) > 0 { + cfg.Rlimits = process.Rlimits + } + cfg.CreateConsole = process.ConsoleSocket != nil + return cfg +} + +func (c *linuxContainer) Destroy() error { + c.m.Lock() + defer c.m.Unlock() + return c.state.destroy() +} + +func (c *linuxContainer) Pause() error { + c.m.Lock() + defer c.m.Unlock() + status, err := c.currentStatus() + if err != nil { + return err + } + switch status { + case Running, Created: + if err := c.cgroupManager.Freeze(configs.Frozen); err != nil { + return err + } + return c.state.transition(&pausedState{ + c: c, + }) + } + return newGenericError(fmt.Errorf("container not running or created: %s", status), ContainerNotRunning) +} + +func (c *linuxContainer) Resume() error { + c.m.Lock() + defer c.m.Unlock() + status, err := c.currentStatus() + if err != nil { + return err + } + if status != Paused { + return newGenericError(fmt.Errorf("container not paused"), ContainerNotPaused) + } + if err := c.cgroupManager.Freeze(configs.Thawed); err != nil { + return err + } + return c.state.transition(&runningState{ + c: c, + }) +} + +func (c *linuxContainer) NotifyOOM() (<-chan struct{}, error) { + // XXX(cyphar): This requires cgroups. + if c.config.Rootless { + return nil, fmt.Errorf("cannot get OOM notifications from rootless container") + } + return notifyOnOOM(c.cgroupManager.GetPaths()) +} + +func (c *linuxContainer) NotifyMemoryPressure(level PressureLevel) (<-chan struct{}, error) { + // XXX(cyphar): This requires cgroups. + if c.config.Rootless { + return nil, fmt.Errorf("cannot get memory pressure notifications from rootless container") + } + return notifyMemoryPressure(c.cgroupManager.GetPaths(), level) +} + +var criuFeatures *criurpc.CriuFeatures + +func (c *linuxContainer) checkCriuFeatures(criuOpts *CriuOpts, rpcOpts *criurpc.CriuOpts, criuFeat *criurpc.CriuFeatures) error { + + var t criurpc.CriuReqType + t = criurpc.CriuReqType_FEATURE_CHECK + + // criu 1.8 => 10800 + if err := c.checkCriuVersion(10800); err != nil { + // Feature checking was introduced with CRIU 1.8. + // Ignore the feature check if an older CRIU version is used + // and just act as before. + // As all automated PR testing is done using CRIU 1.7 this + // code will not be tested by automated PR testing. + return nil + } + + // make sure the features we are looking for are really not from + // some previous check + criuFeatures = nil + + req := &criurpc.CriuReq{ + Type: &t, + // Theoretically this should not be necessary but CRIU + // segfaults if Opts is empty. + // Fixed in CRIU 2.12 + Opts: rpcOpts, + Features: criuFeat, + } + + err := c.criuSwrk(nil, req, criuOpts, false) + if err != nil { + logrus.Debugf("%s", err) + return fmt.Errorf("CRIU feature check failed") + } + + logrus.Debugf("Feature check says: %s", criuFeatures) + missingFeatures := false + + if *criuFeat.MemTrack && !*criuFeatures.MemTrack { + missingFeatures = true + logrus.Debugf("CRIU does not support MemTrack") + } + + if missingFeatures { + return fmt.Errorf("CRIU is missing features") + } + + return nil +} + +func parseCriuVersion(path string) (int, error) { + var x, y, z int + + out, err := exec.Command(path, "-V").Output() + if err != nil { + return 0, fmt.Errorf("Unable to execute CRIU command: %s", path) + } + + x = 0 + y = 0 + z = 0 + if ep := strings.Index(string(out), "-"); ep >= 0 { + // criu Git version format + var version string + if sp := strings.Index(string(out), "GitID"); sp > 0 { + version = string(out)[sp:ep] + } else { + return 0, fmt.Errorf("Unable to parse the CRIU version: %s", path) + } + + n, err := fmt.Sscanf(string(version), "GitID: v%d.%d.%d", &x, &y, &z) // 1.5.2 + if err != nil { + n, err = fmt.Sscanf(string(version), "GitID: v%d.%d", &x, &y) // 1.6 + y++ + } else { + z++ + } + if n < 2 || err != nil { + return 0, fmt.Errorf("Unable to parse the CRIU version: %s %d %s", version, n, err) + } + } else { + // criu release version format + n, err := fmt.Sscanf(string(out), "Version: %d.%d.%d\n", &x, &y, &z) // 1.5.2 + if err != nil { + n, err = fmt.Sscanf(string(out), "Version: %d.%d\n", &x, &y) // 1.6 + } + if n < 2 || err != nil { + return 0, fmt.Errorf("Unable to parse the CRIU version: %s %d %s", out, n, err) + } + } + + return x*10000 + y*100 + z, nil +} + +func compareCriuVersion(criuVersion int, minVersion int) error { + // simple function to perform the actual version compare + if criuVersion < minVersion { + return fmt.Errorf("CRIU version %d must be %d or higher", criuVersion, minVersion) + } + + return nil +} + +// This is used to store the result of criu version RPC +var criuVersionRPC *criurpc.CriuVersion + +// checkCriuVersion checks Criu version greater than or equal to minVersion +func (c *linuxContainer) checkCriuVersion(minVersion int) error { + + // If the version of criu has already been determined there is no need + // to ask criu for the version again. Use the value from c.criuVersion. + if c.criuVersion != 0 { + return compareCriuVersion(c.criuVersion, minVersion) + } + + // First try if this version of CRIU support the version RPC. + // The CRIU version RPC was introduced with CRIU 3.0. + + // First, reset the variable for the RPC answer to nil + criuVersionRPC = nil + + var t criurpc.CriuReqType + t = criurpc.CriuReqType_VERSION + req := &criurpc.CriuReq{ + Type: &t, + } + + err := c.criuSwrk(nil, req, nil, false) + if err != nil { + return fmt.Errorf("CRIU version check failed: %s", err) + } + + if criuVersionRPC != nil { + logrus.Debugf("CRIU version: %s", criuVersionRPC) + // major and minor are always set + c.criuVersion = int(*criuVersionRPC.Major) * 10000 + c.criuVersion += int(*criuVersionRPC.Minor) * 100 + if criuVersionRPC.Sublevel != nil { + c.criuVersion += int(*criuVersionRPC.Sublevel) + } + if criuVersionRPC.Gitid != nil { + // runc's convention is that a CRIU git release is + // always the same as increasing the minor by 1 + c.criuVersion -= (c.criuVersion % 100) + c.criuVersion += 100 + } + return compareCriuVersion(c.criuVersion, minVersion) + } + + // This is CRIU without the version RPC and therefore + // older than 3.0. Parsing the output is required. + + // This can be remove once runc does not work with criu older than 3.0 + + c.criuVersion, err = parseCriuVersion(c.criuPath) + if err != nil { + return err + } + + return compareCriuVersion(c.criuVersion, minVersion) +} + +const descriptorsFilename = "descriptors.json" + +func (c *linuxContainer) addCriuDumpMount(req *criurpc.CriuReq, m *configs.Mount) { + mountDest := m.Destination + if strings.HasPrefix(mountDest, c.config.Rootfs) { + mountDest = mountDest[len(c.config.Rootfs):] + } + + extMnt := &criurpc.ExtMountMap{ + Key: proto.String(mountDest), + Val: proto.String(mountDest), + } + req.Opts.ExtMnt = append(req.Opts.ExtMnt, extMnt) +} + +func (c *linuxContainer) addMaskPaths(req *criurpc.CriuReq) error { + for _, path := range c.config.MaskPaths { + fi, err := os.Stat(fmt.Sprintf("/proc/%d/root/%s", c.initProcess.pid(), path)) + if err != nil { + if os.IsNotExist(err) { + continue + } + return err + } + if fi.IsDir() { + continue + } + + extMnt := &criurpc.ExtMountMap{ + Key: proto.String(path), + Val: proto.String("/dev/null"), + } + req.Opts.ExtMnt = append(req.Opts.ExtMnt, extMnt) + } + + return nil +} + +func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error { + c.m.Lock() + defer c.m.Unlock() + + // TODO(avagin): Figure out how to make this work nicely. CRIU 2.0 has + // support for doing unprivileged dumps, but the setup of + // rootless containers might make this complicated. + if c.config.Rootless { + return fmt.Errorf("cannot checkpoint a rootless container") + } + + // criu 1.5.2 => 10502 + if err := c.checkCriuVersion(10502); err != nil { + return err + } + + if criuOpts.ImagesDirectory == "" { + return fmt.Errorf("invalid directory to save checkpoint") + } + + // Since a container can be C/R'ed multiple times, + // the checkpoint directory may already exist. + if err := os.Mkdir(criuOpts.ImagesDirectory, 0755); err != nil && !os.IsExist(err) { + return err + } + + if criuOpts.WorkDirectory == "" { + criuOpts.WorkDirectory = filepath.Join(c.root, "criu.work") + } + + if err := os.Mkdir(criuOpts.WorkDirectory, 0755); err != nil && !os.IsExist(err) { + return err + } + + workDir, err := os.Open(criuOpts.WorkDirectory) + if err != nil { + return err + } + defer workDir.Close() + + imageDir, err := os.Open(criuOpts.ImagesDirectory) + if err != nil { + return err + } + defer imageDir.Close() + + rpcOpts := criurpc.CriuOpts{ + ImagesDirFd: proto.Int32(int32(imageDir.Fd())), + WorkDirFd: proto.Int32(int32(workDir.Fd())), + LogLevel: proto.Int32(4), + LogFile: proto.String("dump.log"), + Root: proto.String(c.config.Rootfs), + ManageCgroups: proto.Bool(true), + NotifyScripts: proto.Bool(true), + Pid: proto.Int32(int32(c.initProcess.pid())), + ShellJob: proto.Bool(criuOpts.ShellJob), + LeaveRunning: proto.Bool(criuOpts.LeaveRunning), + TcpEstablished: proto.Bool(criuOpts.TcpEstablished), + ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections), + FileLocks: proto.Bool(criuOpts.FileLocks), + EmptyNs: proto.Uint32(criuOpts.EmptyNs), + OrphanPtsMaster: proto.Bool(true), + } + + fcg := c.cgroupManager.GetPaths()["freezer"] + if fcg != "" { + rpcOpts.FreezeCgroup = proto.String(fcg) + } + + // append optional criu opts, e.g., page-server and port + if criuOpts.PageServer.Address != "" && criuOpts.PageServer.Port != 0 { + rpcOpts.Ps = &criurpc.CriuPageServerInfo{ + Address: proto.String(criuOpts.PageServer.Address), + Port: proto.Int32(criuOpts.PageServer.Port), + } + } + + //pre-dump may need parentImage param to complete iterative migration + if criuOpts.ParentImage != "" { + rpcOpts.ParentImg = proto.String(criuOpts.ParentImage) + rpcOpts.TrackMem = proto.Bool(true) + } + + // append optional manage cgroups mode + if criuOpts.ManageCgroupsMode != 0 { + // criu 1.7 => 10700 + if err := c.checkCriuVersion(10700); err != nil { + return err + } + mode := criurpc.CriuCgMode(criuOpts.ManageCgroupsMode) + rpcOpts.ManageCgroupsMode = &mode + } + + var t criurpc.CriuReqType + if criuOpts.PreDump { + feat := criurpc.CriuFeatures{ + MemTrack: proto.Bool(true), + } + + if err := c.checkCriuFeatures(criuOpts, &rpcOpts, &feat); err != nil { + return err + } + + t = criurpc.CriuReqType_PRE_DUMP + } else { + t = criurpc.CriuReqType_DUMP + } + req := &criurpc.CriuReq{ + Type: &t, + Opts: &rpcOpts, + } + + //no need to dump these information in pre-dump + if !criuOpts.PreDump { + for _, m := range c.config.Mounts { + switch m.Device { + case "bind": + c.addCriuDumpMount(req, m) + case "cgroup": + binds, err := getCgroupMounts(m) + if err != nil { + return err + } + for _, b := range binds { + c.addCriuDumpMount(req, b) + } + } + } + + if err := c.addMaskPaths(req); err != nil { + return err + } + + for _, node := range c.config.Devices { + m := &configs.Mount{Destination: node.Path, Source: node.Path} + c.addCriuDumpMount(req, m) + } + + // Write the FD info to a file in the image directory + fdsJSON, err := json.Marshal(c.initProcess.externalDescriptors()) + if err != nil { + return err + } + + err = ioutil.WriteFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename), fdsJSON, 0655) + if err != nil { + return err + } + } + + err = c.criuSwrk(nil, req, criuOpts, false) + if err != nil { + return err + } + return nil +} + +func (c *linuxContainer) addCriuRestoreMount(req *criurpc.CriuReq, m *configs.Mount) { + mountDest := m.Destination + if strings.HasPrefix(mountDest, c.config.Rootfs) { + mountDest = mountDest[len(c.config.Rootfs):] + } + + extMnt := &criurpc.ExtMountMap{ + Key: proto.String(mountDest), + Val: proto.String(m.Source), + } + req.Opts.ExtMnt = append(req.Opts.ExtMnt, extMnt) +} + +func (c *linuxContainer) restoreNetwork(req *criurpc.CriuReq, criuOpts *CriuOpts) { + for _, iface := range c.config.Networks { + switch iface.Type { + case "veth": + veth := new(criurpc.CriuVethPair) + veth.IfOut = proto.String(iface.HostInterfaceName) + veth.IfIn = proto.String(iface.Name) + req.Opts.Veths = append(req.Opts.Veths, veth) + case "loopback": + // Do nothing + } + } + for _, i := range criuOpts.VethPairs { + veth := new(criurpc.CriuVethPair) + veth.IfOut = proto.String(i.HostInterfaceName) + veth.IfIn = proto.String(i.ContainerInterfaceName) + req.Opts.Veths = append(req.Opts.Veths, veth) + } +} + +func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error { + c.m.Lock() + defer c.m.Unlock() + + // TODO(avagin): Figure out how to make this work nicely. CRIU doesn't have + // support for unprivileged restore at the moment. + if c.config.Rootless { + return fmt.Errorf("cannot restore a rootless container") + } + + // criu 1.5.2 => 10502 + if err := c.checkCriuVersion(10502); err != nil { + return err + } + if criuOpts.WorkDirectory == "" { + criuOpts.WorkDirectory = filepath.Join(c.root, "criu.work") + } + // Since a container can be C/R'ed multiple times, + // the work directory may already exist. + if err := os.Mkdir(criuOpts.WorkDirectory, 0655); err != nil && !os.IsExist(err) { + return err + } + workDir, err := os.Open(criuOpts.WorkDirectory) + if err != nil { + return err + } + defer workDir.Close() + if criuOpts.ImagesDirectory == "" { + return fmt.Errorf("invalid directory to restore checkpoint") + } + imageDir, err := os.Open(criuOpts.ImagesDirectory) + if err != nil { + return err + } + defer imageDir.Close() + // CRIU has a few requirements for a root directory: + // * it must be a mount point + // * its parent must not be overmounted + // c.config.Rootfs is bind-mounted to a temporary directory + // to satisfy these requirements. + root := filepath.Join(c.root, "criu-root") + if err := os.Mkdir(root, 0755); err != nil { + return err + } + defer os.Remove(root) + root, err = filepath.EvalSymlinks(root) + if err != nil { + return err + } + err = unix.Mount(c.config.Rootfs, root, "", unix.MS_BIND|unix.MS_REC, "") + if err != nil { + return err + } + defer unix.Unmount(root, unix.MNT_DETACH) + t := criurpc.CriuReqType_RESTORE + req := &criurpc.CriuReq{ + Type: &t, + Opts: &criurpc.CriuOpts{ + ImagesDirFd: proto.Int32(int32(imageDir.Fd())), + WorkDirFd: proto.Int32(int32(workDir.Fd())), + EvasiveDevices: proto.Bool(true), + LogLevel: proto.Int32(4), + LogFile: proto.String("restore.log"), + RstSibling: proto.Bool(true), + Root: proto.String(root), + ManageCgroups: proto.Bool(true), + NotifyScripts: proto.Bool(true), + ShellJob: proto.Bool(criuOpts.ShellJob), + ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections), + TcpEstablished: proto.Bool(criuOpts.TcpEstablished), + FileLocks: proto.Bool(criuOpts.FileLocks), + EmptyNs: proto.Uint32(criuOpts.EmptyNs), + OrphanPtsMaster: proto.Bool(true), + }, + } + + for _, m := range c.config.Mounts { + switch m.Device { + case "bind": + c.addCriuRestoreMount(req, m) + case "cgroup": + binds, err := getCgroupMounts(m) + if err != nil { + return err + } + for _, b := range binds { + c.addCriuRestoreMount(req, b) + } + } + } + + if len(c.config.MaskPaths) > 0 { + m := &configs.Mount{Destination: "/dev/null", Source: "/dev/null"} + c.addCriuRestoreMount(req, m) + } + + for _, node := range c.config.Devices { + m := &configs.Mount{Destination: node.Path, Source: node.Path} + c.addCriuRestoreMount(req, m) + } + + if criuOpts.EmptyNs&unix.CLONE_NEWNET == 0 { + c.restoreNetwork(req, criuOpts) + } + + // append optional manage cgroups mode + if criuOpts.ManageCgroupsMode != 0 { + // criu 1.7 => 10700 + if err := c.checkCriuVersion(10700); err != nil { + return err + } + mode := criurpc.CriuCgMode(criuOpts.ManageCgroupsMode) + req.Opts.ManageCgroupsMode = &mode + } + + var ( + fds []string + fdJSON []byte + ) + if fdJSON, err = ioutil.ReadFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename)); err != nil { + return err + } + + if err := json.Unmarshal(fdJSON, &fds); err != nil { + return err + } + for i := range fds { + if s := fds[i]; strings.Contains(s, "pipe:") { + inheritFd := new(criurpc.InheritFd) + inheritFd.Key = proto.String(s) + inheritFd.Fd = proto.Int32(int32(i)) + req.Opts.InheritFd = append(req.Opts.InheritFd, inheritFd) + } + } + return c.criuSwrk(process, req, criuOpts, true) +} + +func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error { + // XXX: Do we need to deal with this case? AFAIK criu still requires root. + if err := c.cgroupManager.Apply(pid); err != nil { + return err + } + + if err := c.cgroupManager.Set(c.config); err != nil { + return newSystemError(err) + } + + path := fmt.Sprintf("/proc/%d/cgroup", pid) + cgroupsPaths, err := cgroups.ParseCgroupFile(path) + if err != nil { + return err + } + + for c, p := range cgroupsPaths { + cgroupRoot := &criurpc.CgroupRoot{ + Ctrl: proto.String(c), + Path: proto.String(p), + } + req.Opts.CgRoot = append(req.Opts.CgRoot, cgroupRoot) + } + + return nil +} + +func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts, applyCgroups bool) error { + fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_SEQPACKET|unix.SOCK_CLOEXEC, 0) + if err != nil { + return err + } + + var logPath string + if opts != nil { + logPath = filepath.Join(opts.WorkDirectory, req.GetOpts().GetLogFile()) + } else { + // For the VERSION RPC 'opts' is set to 'nil' and therefore + // opts.WorkDirectory does not exist. Set logPath to "". + logPath = "" + } + criuClient := os.NewFile(uintptr(fds[0]), "criu-transport-client") + criuClientFileCon, err := net.FileConn(criuClient) + criuClient.Close() + if err != nil { + return err + } + + criuClientCon := criuClientFileCon.(*net.UnixConn) + defer criuClientCon.Close() + + criuServer := os.NewFile(uintptr(fds[1]), "criu-transport-server") + defer criuServer.Close() + + args := []string{"swrk", "3"} + if c.criuVersion != 0 { + // If the CRIU Version is still '0' then this is probably + // the initial CRIU run to detect the version. Skip it. + logrus.Debugf("Using CRIU %d at: %s", c.criuVersion, c.criuPath) + } + logrus.Debugf("Using CRIU with following args: %s", args) + cmd := exec.Command(c.criuPath, args...) + if process != nil { + cmd.Stdin = process.Stdin + cmd.Stdout = process.Stdout + cmd.Stderr = process.Stderr + } + cmd.ExtraFiles = append(cmd.ExtraFiles, criuServer) + + if err := cmd.Start(); err != nil { + return err + } + criuServer.Close() + + defer func() { + criuClientCon.Close() + _, err := cmd.Process.Wait() + if err != nil { + return + } + }() + + if applyCgroups { + err := c.criuApplyCgroups(cmd.Process.Pid, req) + if err != nil { + return err + } + } + + var extFds []string + if process != nil { + extFds, err = getPipeFds(cmd.Process.Pid) + if err != nil { + return err + } + } + + logrus.Debugf("Using CRIU in %s mode", req.GetType().String()) + // In the case of criurpc.CriuReqType_FEATURE_CHECK req.GetOpts() + // should be empty. For older CRIU versions it still will be + // available but empty. criurpc.CriuReqType_VERSION actually + // has no req.GetOpts(). + if !(req.GetType() == criurpc.CriuReqType_FEATURE_CHECK || + req.GetType() == criurpc.CriuReqType_VERSION) { + + val := reflect.ValueOf(req.GetOpts()) + v := reflect.Indirect(val) + for i := 0; i < v.NumField(); i++ { + st := v.Type() + name := st.Field(i).Name + if strings.HasPrefix(name, "XXX_") { + continue + } + value := val.MethodByName("Get" + name).Call([]reflect.Value{}) + logrus.Debugf("CRIU option %s with value %v", name, value[0]) + } + } + data, err := proto.Marshal(req) + if err != nil { + return err + } + _, err = criuClientCon.Write(data) + if err != nil { + return err + } + + buf := make([]byte, 10*4096) + oob := make([]byte, 4096) + for true { + n, oobn, _, _, err := criuClientCon.ReadMsgUnix(buf, oob) + if err != nil { + return err + } + if n == 0 { + return fmt.Errorf("unexpected EOF") + } + if n == len(buf) { + return fmt.Errorf("buffer is too small") + } + + resp := new(criurpc.CriuResp) + err = proto.Unmarshal(buf[:n], resp) + if err != nil { + return err + } + if !resp.GetSuccess() { + typeString := req.GetType().String() + if typeString == "VERSION" { + // If the VERSION RPC fails this probably means that the CRIU + // version is too old for this RPC. Just return 'nil'. + return nil + } + return fmt.Errorf("criu failed: type %s errno %d\nlog file: %s", typeString, resp.GetCrErrno(), logPath) + } + + t := resp.GetType() + switch { + case t == criurpc.CriuReqType_VERSION: + logrus.Debugf("CRIU version: %s", resp) + criuVersionRPC = resp.GetVersion() + break + case t == criurpc.CriuReqType_FEATURE_CHECK: + logrus.Debugf("Feature check says: %s", resp) + criuFeatures = resp.GetFeatures() + case t == criurpc.CriuReqType_NOTIFY: + if err := c.criuNotifications(resp, process, opts, extFds, oob[:oobn]); err != nil { + return err + } + t = criurpc.CriuReqType_NOTIFY + req = &criurpc.CriuReq{ + Type: &t, + NotifySuccess: proto.Bool(true), + } + data, err = proto.Marshal(req) + if err != nil { + return err + } + _, err = criuClientCon.Write(data) + if err != nil { + return err + } + continue + case t == criurpc.CriuReqType_RESTORE: + case t == criurpc.CriuReqType_DUMP: + case t == criurpc.CriuReqType_PRE_DUMP: + default: + return fmt.Errorf("unable to parse the response %s", resp.String()) + } + + break + } + + criuClientCon.CloseWrite() + // cmd.Wait() waits cmd.goroutines which are used for proxying file descriptors. + // Here we want to wait only the CRIU process. + st, err := cmd.Process.Wait() + if err != nil { + return err + } + + // In pre-dump mode CRIU is in a loop and waits for + // the final DUMP command. + // The current runc pre-dump approach, however, is + // start criu in PRE_DUMP once for a single pre-dump + // and not the whole series of pre-dump, pre-dump, ...m, dump + // If we got the message CriuReqType_PRE_DUMP it means + // CRIU was successful and we need to forcefully stop CRIU + if !st.Success() && *req.Type != criurpc.CriuReqType_PRE_DUMP { + return fmt.Errorf("criu failed: %s\nlog file: %s", st.String(), logPath) + } + return nil +} + +// block any external network activity +func lockNetwork(config *configs.Config) error { + for _, config := range config.Networks { + strategy, err := getStrategy(config.Type) + if err != nil { + return err + } + + if err := strategy.detach(config); err != nil { + return err + } + } + return nil +} + +func unlockNetwork(config *configs.Config) error { + for _, config := range config.Networks { + strategy, err := getStrategy(config.Type) + if err != nil { + return err + } + if err = strategy.attach(config); err != nil { + return err + } + } + return nil +} + +func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Process, opts *CriuOpts, fds []string, oob []byte) error { + notify := resp.GetNotify() + if notify == nil { + return fmt.Errorf("invalid response: %s", resp.String()) + } + logrus.Debugf("notify: %s\n", notify.GetScript()) + switch { + case notify.GetScript() == "post-dump": + f, err := os.Create(filepath.Join(c.root, "checkpoint")) + if err != nil { + return err + } + f.Close() + case notify.GetScript() == "network-unlock": + if err := unlockNetwork(c.config); err != nil { + return err + } + case notify.GetScript() == "network-lock": + if err := lockNetwork(c.config); err != nil { + return err + } + case notify.GetScript() == "setup-namespaces": + if c.config.Hooks != nil { + s := configs.HookState{ + Version: c.config.Version, + ID: c.id, + Pid: int(notify.GetPid()), + Bundle: utils.SearchLabels(c.config.Labels, "bundle"), + } + for i, hook := range c.config.Hooks.Prestart { + if err := hook.Run(s); err != nil { + return newSystemErrorWithCausef(err, "running prestart hook %d", i) + } + } + } + case notify.GetScript() == "post-restore": + pid := notify.GetPid() + r, err := newRestoredProcess(int(pid), fds) + if err != nil { + return err + } + process.ops = r + if err := c.state.transition(&restoredState{ + imageDir: opts.ImagesDirectory, + c: c, + }); err != nil { + return err + } + // create a timestamp indicating when the restored checkpoint was started + c.created = time.Now().UTC() + if _, err := c.updateState(r); err != nil { + return err + } + if err := os.Remove(filepath.Join(c.root, "checkpoint")); err != nil { + if !os.IsNotExist(err) { + logrus.Error(err) + } + } + case notify.GetScript() == "orphan-pts-master": + scm, err := unix.ParseSocketControlMessage(oob) + if err != nil { + return err + } + fds, err := unix.ParseUnixRights(&scm[0]) + if err != nil { + return err + } + + master := os.NewFile(uintptr(fds[0]), "orphan-pts-master") + defer master.Close() + + // While we can access console.master, using the API is a good idea. + if err := utils.SendFd(process.ConsoleSocket, master); err != nil { + return err + } + } + return nil +} + +func (c *linuxContainer) updateState(process parentProcess) (*State, error) { + c.initProcess = process + state, err := c.currentState() + if err != nil { + return nil, err + } + err = c.saveState(state) + if err != nil { + return nil, err + } + return state, nil +} + +func (c *linuxContainer) saveState(s *State) error { + f, err := os.Create(filepath.Join(c.root, stateFilename)) + if err != nil { + return err + } + defer f.Close() + return utils.WriteJSON(f, s) +} + +func (c *linuxContainer) deleteState() error { + return os.Remove(filepath.Join(c.root, stateFilename)) +} + +func (c *linuxContainer) currentStatus() (Status, error) { + if err := c.refreshState(); err != nil { + return -1, err + } + return c.state.status(), nil +} + +// refreshState needs to be called to verify that the current state on the +// container is what is true. Because consumers of libcontainer can use it +// out of process we need to verify the container's status based on runtime +// information and not rely on our in process info. +func (c *linuxContainer) refreshState() error { + paused, err := c.isPaused() + if err != nil { + return err + } + if paused { + return c.state.transition(&pausedState{c: c}) + } + t, err := c.runType() + if err != nil { + return err + } + switch t { + case Created: + return c.state.transition(&createdState{c: c}) + case Running: + return c.state.transition(&runningState{c: c}) + } + return c.state.transition(&stoppedState{c: c}) +} + +func (c *linuxContainer) runType() (Status, error) { + if c.initProcess == nil { + return Stopped, nil + } + pid := c.initProcess.pid() + stat, err := system.Stat(pid) + if err != nil { + return Stopped, nil + } + if stat.StartTime != c.initProcessStartTime || stat.State == system.Zombie || stat.State == system.Dead { + return Stopped, nil + } + // We'll create exec fifo and blocking on it after container is created, + // and delete it after start container. + if _, err := os.Stat(filepath.Join(c.root, execFifoFilename)); err == nil { + return Created, nil + } + return Running, nil +} + +func (c *linuxContainer) isPaused() (bool, error) { + fcg := c.cgroupManager.GetPaths()["freezer"] + if fcg == "" { + // A container doesn't have a freezer cgroup + return false, nil + } + data, err := ioutil.ReadFile(filepath.Join(fcg, "freezer.state")) + if err != nil { + // If freezer cgroup is not mounted, the container would just be not paused. + if os.IsNotExist(err) { + return false, nil + } + return false, newSystemErrorWithCause(err, "checking if container is paused") + } + return bytes.Equal(bytes.TrimSpace(data), []byte("FROZEN")), nil +} + +func (c *linuxContainer) currentState() (*State, error) { + var ( + startTime uint64 + externalDescriptors []string + pid = -1 + ) + if c.initProcess != nil { + pid = c.initProcess.pid() + startTime, _ = c.initProcess.startTime() + externalDescriptors = c.initProcess.externalDescriptors() + } + state := &State{ + BaseState: BaseState{ + ID: c.ID(), + Config: *c.config, + InitProcessPid: pid, + InitProcessStartTime: startTime, + Created: c.created, + }, + Rootless: c.config.Rootless, + CgroupPaths: c.cgroupManager.GetPaths(), + NamespacePaths: make(map[configs.NamespaceType]string), + ExternalDescriptors: externalDescriptors, + } + if pid > 0 { + for _, ns := range c.config.Namespaces { + state.NamespacePaths[ns.Type] = ns.GetPath(pid) + } + for _, nsType := range configs.NamespaceTypes() { + if !configs.IsNamespaceSupported(nsType) { + continue + } + if _, ok := state.NamespacePaths[nsType]; !ok { + ns := configs.Namespace{Type: nsType} + state.NamespacePaths[ns.Type] = ns.GetPath(pid) + } + } + } + return state, nil +} + +// orderNamespacePaths sorts namespace paths into a list of paths that we +// can setns in order. +func (c *linuxContainer) orderNamespacePaths(namespaces map[configs.NamespaceType]string) ([]string, error) { + paths := []string{} + + for _, ns := range configs.NamespaceTypes() { + + // Remove namespaces that we don't need to join. + if !c.config.Namespaces.Contains(ns) { + continue + } + + if p, ok := namespaces[ns]; ok && p != "" { + // check if the requested namespace is supported + if !configs.IsNamespaceSupported(ns) { + return nil, newSystemError(fmt.Errorf("namespace %s is not supported", ns)) + } + // only set to join this namespace if it exists + if _, err := os.Lstat(p); err != nil { + return nil, newSystemErrorWithCausef(err, "running lstat on namespace path %q", p) + } + // do not allow namespace path with comma as we use it to separate + // the namespace paths + if strings.ContainsRune(p, ',') { + return nil, newSystemError(fmt.Errorf("invalid path %s", p)) + } + paths = append(paths, fmt.Sprintf("%s:%s", configs.NsName(ns), p)) + } + + } + + return paths, nil +} + +func encodeIDMapping(idMap []configs.IDMap) ([]byte, error) { + data := bytes.NewBuffer(nil) + for _, im := range idMap { + line := fmt.Sprintf("%d %d %d\n", im.ContainerID, im.HostID, im.Size) + if _, err := data.WriteString(line); err != nil { + return nil, err + } + } + return data.Bytes(), nil +} + +// bootstrapData encodes the necessary data in netlink binary format +// as a io.Reader. +// Consumer can write the data to a bootstrap program +// such as one that uses nsenter package to bootstrap the container's +// init process correctly, i.e. with correct namespaces, uid/gid +// mapping etc. +func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.NamespaceType]string) (io.Reader, error) { + // create the netlink message + r := nl.NewNetlinkRequest(int(InitMsg), 0) + + // write cloneFlags + r.AddData(&Int32msg{ + Type: CloneFlagsAttr, + Value: uint32(cloneFlags), + }) + + // write custom namespace paths + if len(nsMaps) > 0 { + nsPaths, err := c.orderNamespacePaths(nsMaps) + if err != nil { + return nil, err + } + r.AddData(&Bytemsg{ + Type: NsPathsAttr, + Value: []byte(strings.Join(nsPaths, ",")), + }) + } + + // write namespace paths only when we are not joining an existing user ns + _, joinExistingUser := nsMaps[configs.NEWUSER] + if !joinExistingUser { + // write uid mappings + if len(c.config.UidMappings) > 0 { + b, err := encodeIDMapping(c.config.UidMappings) + if err != nil { + return nil, err + } + r.AddData(&Bytemsg{ + Type: UidmapAttr, + Value: b, + }) + } + + // write gid mappings + if len(c.config.GidMappings) > 0 { + b, err := encodeIDMapping(c.config.GidMappings) + if err != nil { + return nil, err + } + r.AddData(&Bytemsg{ + Type: GidmapAttr, + Value: b, + }) + // The following only applies if we are root. + if !c.config.Rootless { + // check if we have CAP_SETGID to setgroup properly + pid, err := capability.NewPid(os.Getpid()) + if err != nil { + return nil, err + } + if !pid.Get(capability.EFFECTIVE, capability.CAP_SETGID) { + r.AddData(&Boolmsg{ + Type: SetgroupAttr, + Value: true, + }) + } + } + } + } + + // write oom_score_adj + r.AddData(&Bytemsg{ + Type: OomScoreAdjAttr, + Value: []byte(fmt.Sprintf("%d", c.config.OomScoreAdj)), + }) + + // write rootless + r.AddData(&Boolmsg{ + Type: RootlessAttr, + Value: c.config.Rootless, + }) + + return bytes.NewReader(r.Serialize()), nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/container_solaris.go b/vendor/github.com/opencontainers/runc/libcontainer/container_solaris.go new file mode 100644 index 000000000..bb84ff740 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/container_solaris.go @@ -0,0 +1,20 @@ +package libcontainer + +// State represents a running container's state +type State struct { + BaseState + + // Platform specific fields below here +} + +// A libcontainer container object. +// +// Each container is thread-safe within the same process. Since a container can +// be destroyed by a separate process, any function may return that the container +// was not found. +type Container interface { + BaseContainer + + // Methods below here are platform specific + +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/container_windows.go b/vendor/github.com/opencontainers/runc/libcontainer/container_windows.go new file mode 100644 index 000000000..bb84ff740 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/container_windows.go @@ -0,0 +1,20 @@ +package libcontainer + +// State represents a running container's state +type State struct { + BaseState + + // Platform specific fields below here +} + +// A libcontainer container object. +// +// Each container is thread-safe within the same process. Since a container can +// be destroyed by a separate process, any function may return that the container +// was not found. +type Container interface { + BaseContainer + + // Methods below here are platform specific + +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/criu_opts_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/criu_opts_linux.go new file mode 100644 index 000000000..9423d2464 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/criu_opts_linux.go @@ -0,0 +1,37 @@ +package libcontainer + +// cgroup restoring strategy provided by criu +type cgMode uint32 + +const ( + CRIU_CG_MODE_SOFT cgMode = 3 + iota // restore cgroup properties if only dir created by criu + CRIU_CG_MODE_FULL // always restore all cgroups and their properties + CRIU_CG_MODE_STRICT // restore all, requiring them to not present in the system + CRIU_CG_MODE_DEFAULT // the same as CRIU_CG_MODE_SOFT +) + +type CriuPageServerInfo struct { + Address string // IP address of CRIU page server + Port int32 // port number of CRIU page server +} + +type VethPairName struct { + ContainerInterfaceName string + HostInterfaceName string +} + +type CriuOpts struct { + ImagesDirectory string // directory for storing image files + WorkDirectory string // directory to cd and write logs/pidfiles/stats to + ParentImage string // direcotry for storing parent image files in pre-dump and dump + LeaveRunning bool // leave container in running state after checkpoint + TcpEstablished bool // checkpoint/restore established TCP connections + ExternalUnixConnections bool // allow external unix connections + ShellJob bool // allow to dump and restore shell jobs + FileLocks bool // handle file locks, for safety + PreDump bool // call criu predump to perform iterative checkpoint + PageServer CriuPageServerInfo // allow to dump to criu page server + VethPairs []VethPairName // pass the veth to criu when restore + ManageCgroupsMode cgMode // dump or restore cgroup mode + EmptyNs uint32 // don't c/r properties for namespace from this mask +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/criu_opts_windows.go b/vendor/github.com/opencontainers/runc/libcontainer/criu_opts_windows.go new file mode 100644 index 000000000..bc9207703 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/criu_opts_windows.go @@ -0,0 +1,6 @@ +package libcontainer + +// TODO Windows: This can ultimately be entirely factored out as criu is +// a Unix concept not relevant on Windows. +type CriuOpts struct { +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/criurpc/criurpc.pb.go b/vendor/github.com/opencontainers/runc/libcontainer/criurpc/criurpc.pb.go new file mode 100644 index 000000000..21af9db97 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/criurpc/criurpc.pb.go @@ -0,0 +1,1178 @@ +// Code generated by protoc-gen-go. +// source: criurpc.proto +// DO NOT EDIT! + +/* +Package criurpc is a generated protocol buffer package. + +It is generated from these files: + criurpc.proto + +It has these top-level messages: + CriuPageServerInfo + CriuVethPair + ExtMountMap + JoinNamespace + InheritFd + CgroupRoot + UnixSk + CriuOpts + CriuDumpResp + CriuRestoreResp + CriuNotify + CriuFeatures + CriuReq + CriuResp + CriuVersion +*/ +package criurpc + +import proto "github.com/golang/protobuf/proto" +import fmt "fmt" +import math "math" + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = fmt.Errorf +var _ = math.Inf + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the proto package it is being compiled against. +// A compilation error at this line likely means your copy of the +// proto package needs to be updated. +const _ = proto.ProtoPackageIsVersion2 // please upgrade the proto package + +type CriuCgMode int32 + +const ( + CriuCgMode_IGNORE CriuCgMode = 0 + CriuCgMode_CG_NONE CriuCgMode = 1 + CriuCgMode_PROPS CriuCgMode = 2 + CriuCgMode_SOFT CriuCgMode = 3 + CriuCgMode_FULL CriuCgMode = 4 + CriuCgMode_STRICT CriuCgMode = 5 + CriuCgMode_DEFAULT CriuCgMode = 6 +) + +var CriuCgMode_name = map[int32]string{ + 0: "IGNORE", + 1: "CG_NONE", + 2: "PROPS", + 3: "SOFT", + 4: "FULL", + 5: "STRICT", + 6: "DEFAULT", +} +var CriuCgMode_value = map[string]int32{ + "IGNORE": 0, + "CG_NONE": 1, + "PROPS": 2, + "SOFT": 3, + "FULL": 4, + "STRICT": 5, + "DEFAULT": 6, +} + +func (x CriuCgMode) Enum() *CriuCgMode { + p := new(CriuCgMode) + *p = x + return p +} +func (x CriuCgMode) String() string { + return proto.EnumName(CriuCgMode_name, int32(x)) +} +func (x *CriuCgMode) UnmarshalJSON(data []byte) error { + value, err := proto.UnmarshalJSONEnum(CriuCgMode_value, data, "CriuCgMode") + if err != nil { + return err + } + *x = CriuCgMode(value) + return nil +} +func (CriuCgMode) EnumDescriptor() ([]byte, []int) { return fileDescriptor0, []int{0} } + +type CriuReqType int32 + +const ( + CriuReqType_EMPTY CriuReqType = 0 + CriuReqType_DUMP CriuReqType = 1 + CriuReqType_RESTORE CriuReqType = 2 + CriuReqType_CHECK CriuReqType = 3 + CriuReqType_PRE_DUMP CriuReqType = 4 + CriuReqType_PAGE_SERVER CriuReqType = 5 + CriuReqType_NOTIFY CriuReqType = 6 + CriuReqType_CPUINFO_DUMP CriuReqType = 7 + CriuReqType_CPUINFO_CHECK CriuReqType = 8 + CriuReqType_FEATURE_CHECK CriuReqType = 9 + CriuReqType_VERSION CriuReqType = 10 +) + +var CriuReqType_name = map[int32]string{ + 0: "EMPTY", + 1: "DUMP", + 2: "RESTORE", + 3: "CHECK", + 4: "PRE_DUMP", + 5: "PAGE_SERVER", + 6: "NOTIFY", + 7: "CPUINFO_DUMP", + 8: "CPUINFO_CHECK", + 9: "FEATURE_CHECK", + 10: "VERSION", +} +var CriuReqType_value = map[string]int32{ + "EMPTY": 0, + "DUMP": 1, + "RESTORE": 2, + "CHECK": 3, + "PRE_DUMP": 4, + "PAGE_SERVER": 5, + "NOTIFY": 6, + "CPUINFO_DUMP": 7, + "CPUINFO_CHECK": 8, + "FEATURE_CHECK": 9, + "VERSION": 10, +} + +func (x CriuReqType) Enum() *CriuReqType { + p := new(CriuReqType) + *p = x + return p +} +func (x CriuReqType) String() string { + return proto.EnumName(CriuReqType_name, int32(x)) +} +func (x *CriuReqType) UnmarshalJSON(data []byte) error { + value, err := proto.UnmarshalJSONEnum(CriuReqType_value, data, "CriuReqType") + if err != nil { + return err + } + *x = CriuReqType(value) + return nil +} +func (CriuReqType) EnumDescriptor() ([]byte, []int) { return fileDescriptor0, []int{1} } + +type CriuPageServerInfo struct { + Address *string `protobuf:"bytes,1,opt,name=address" json:"address,omitempty"` + Port *int32 `protobuf:"varint,2,opt,name=port" json:"port,omitempty"` + Pid *int32 `protobuf:"varint,3,opt,name=pid" json:"pid,omitempty"` + Fd *int32 `protobuf:"varint,4,opt,name=fd" json:"fd,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *CriuPageServerInfo) Reset() { *m = CriuPageServerInfo{} } +func (m *CriuPageServerInfo) String() string { return proto.CompactTextString(m) } +func (*CriuPageServerInfo) ProtoMessage() {} +func (*CriuPageServerInfo) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{0} } + +func (m *CriuPageServerInfo) GetAddress() string { + if m != nil && m.Address != nil { + return *m.Address + } + return "" +} + +func (m *CriuPageServerInfo) GetPort() int32 { + if m != nil && m.Port != nil { + return *m.Port + } + return 0 +} + +func (m *CriuPageServerInfo) GetPid() int32 { + if m != nil && m.Pid != nil { + return *m.Pid + } + return 0 +} + +func (m *CriuPageServerInfo) GetFd() int32 { + if m != nil && m.Fd != nil { + return *m.Fd + } + return 0 +} + +type CriuVethPair struct { + IfIn *string `protobuf:"bytes,1,req,name=if_in,json=ifIn" json:"if_in,omitempty"` + IfOut *string `protobuf:"bytes,2,req,name=if_out,json=ifOut" json:"if_out,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *CriuVethPair) Reset() { *m = CriuVethPair{} } +func (m *CriuVethPair) String() string { return proto.CompactTextString(m) } +func (*CriuVethPair) ProtoMessage() {} +func (*CriuVethPair) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{1} } + +func (m *CriuVethPair) GetIfIn() string { + if m != nil && m.IfIn != nil { + return *m.IfIn + } + return "" +} + +func (m *CriuVethPair) GetIfOut() string { + if m != nil && m.IfOut != nil { + return *m.IfOut + } + return "" +} + +type ExtMountMap struct { + Key *string `protobuf:"bytes,1,req,name=key" json:"key,omitempty"` + Val *string `protobuf:"bytes,2,req,name=val" json:"val,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *ExtMountMap) Reset() { *m = ExtMountMap{} } +func (m *ExtMountMap) String() string { return proto.CompactTextString(m) } +func (*ExtMountMap) ProtoMessage() {} +func (*ExtMountMap) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{2} } + +func (m *ExtMountMap) GetKey() string { + if m != nil && m.Key != nil { + return *m.Key + } + return "" +} + +func (m *ExtMountMap) GetVal() string { + if m != nil && m.Val != nil { + return *m.Val + } + return "" +} + +type JoinNamespace struct { + Ns *string `protobuf:"bytes,1,req,name=ns" json:"ns,omitempty"` + NsFile *string `protobuf:"bytes,2,req,name=ns_file,json=nsFile" json:"ns_file,omitempty"` + ExtraOpt *string `protobuf:"bytes,3,opt,name=extra_opt,json=extraOpt" json:"extra_opt,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *JoinNamespace) Reset() { *m = JoinNamespace{} } +func (m *JoinNamespace) String() string { return proto.CompactTextString(m) } +func (*JoinNamespace) ProtoMessage() {} +func (*JoinNamespace) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{3} } + +func (m *JoinNamespace) GetNs() string { + if m != nil && m.Ns != nil { + return *m.Ns + } + return "" +} + +func (m *JoinNamespace) GetNsFile() string { + if m != nil && m.NsFile != nil { + return *m.NsFile + } + return "" +} + +func (m *JoinNamespace) GetExtraOpt() string { + if m != nil && m.ExtraOpt != nil { + return *m.ExtraOpt + } + return "" +} + +type InheritFd struct { + Key *string `protobuf:"bytes,1,req,name=key" json:"key,omitempty"` + Fd *int32 `protobuf:"varint,2,req,name=fd" json:"fd,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *InheritFd) Reset() { *m = InheritFd{} } +func (m *InheritFd) String() string { return proto.CompactTextString(m) } +func (*InheritFd) ProtoMessage() {} +func (*InheritFd) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{4} } + +func (m *InheritFd) GetKey() string { + if m != nil && m.Key != nil { + return *m.Key + } + return "" +} + +func (m *InheritFd) GetFd() int32 { + if m != nil && m.Fd != nil { + return *m.Fd + } + return 0 +} + +type CgroupRoot struct { + Ctrl *string `protobuf:"bytes,1,opt,name=ctrl" json:"ctrl,omitempty"` + Path *string `protobuf:"bytes,2,req,name=path" json:"path,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *CgroupRoot) Reset() { *m = CgroupRoot{} } +func (m *CgroupRoot) String() string { return proto.CompactTextString(m) } +func (*CgroupRoot) ProtoMessage() {} +func (*CgroupRoot) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{5} } + +func (m *CgroupRoot) GetCtrl() string { + if m != nil && m.Ctrl != nil { + return *m.Ctrl + } + return "" +} + +func (m *CgroupRoot) GetPath() string { + if m != nil && m.Path != nil { + return *m.Path + } + return "" +} + +type UnixSk struct { + Inode *uint32 `protobuf:"varint,1,req,name=inode" json:"inode,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *UnixSk) Reset() { *m = UnixSk{} } +func (m *UnixSk) String() string { return proto.CompactTextString(m) } +func (*UnixSk) ProtoMessage() {} +func (*UnixSk) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{6} } + +func (m *UnixSk) GetInode() uint32 { + if m != nil && m.Inode != nil { + return *m.Inode + } + return 0 +} + +type CriuOpts struct { + ImagesDirFd *int32 `protobuf:"varint,1,req,name=images_dir_fd,json=imagesDirFd" json:"images_dir_fd,omitempty"` + Pid *int32 `protobuf:"varint,2,opt,name=pid" json:"pid,omitempty"` + LeaveRunning *bool `protobuf:"varint,3,opt,name=leave_running,json=leaveRunning" json:"leave_running,omitempty"` + ExtUnixSk *bool `protobuf:"varint,4,opt,name=ext_unix_sk,json=extUnixSk" json:"ext_unix_sk,omitempty"` + TcpEstablished *bool `protobuf:"varint,5,opt,name=tcp_established,json=tcpEstablished" json:"tcp_established,omitempty"` + EvasiveDevices *bool `protobuf:"varint,6,opt,name=evasive_devices,json=evasiveDevices" json:"evasive_devices,omitempty"` + ShellJob *bool `protobuf:"varint,7,opt,name=shell_job,json=shellJob" json:"shell_job,omitempty"` + FileLocks *bool `protobuf:"varint,8,opt,name=file_locks,json=fileLocks" json:"file_locks,omitempty"` + LogLevel *int32 `protobuf:"varint,9,opt,name=log_level,json=logLevel,def=2" json:"log_level,omitempty"` + LogFile *string `protobuf:"bytes,10,opt,name=log_file,json=logFile" json:"log_file,omitempty"` + Ps *CriuPageServerInfo `protobuf:"bytes,11,opt,name=ps" json:"ps,omitempty"` + NotifyScripts *bool `protobuf:"varint,12,opt,name=notify_scripts,json=notifyScripts" json:"notify_scripts,omitempty"` + Root *string `protobuf:"bytes,13,opt,name=root" json:"root,omitempty"` + ParentImg *string `protobuf:"bytes,14,opt,name=parent_img,json=parentImg" json:"parent_img,omitempty"` + TrackMem *bool `protobuf:"varint,15,opt,name=track_mem,json=trackMem" json:"track_mem,omitempty"` + AutoDedup *bool `protobuf:"varint,16,opt,name=auto_dedup,json=autoDedup" json:"auto_dedup,omitempty"` + WorkDirFd *int32 `protobuf:"varint,17,opt,name=work_dir_fd,json=workDirFd" json:"work_dir_fd,omitempty"` + LinkRemap *bool `protobuf:"varint,18,opt,name=link_remap,json=linkRemap" json:"link_remap,omitempty"` + Veths []*CriuVethPair `protobuf:"bytes,19,rep,name=veths" json:"veths,omitempty"` + CpuCap *uint32 `protobuf:"varint,20,opt,name=cpu_cap,json=cpuCap,def=4294967295" json:"cpu_cap,omitempty"` + ForceIrmap *bool `protobuf:"varint,21,opt,name=force_irmap,json=forceIrmap" json:"force_irmap,omitempty"` + ExecCmd []string `protobuf:"bytes,22,rep,name=exec_cmd,json=execCmd" json:"exec_cmd,omitempty"` + ExtMnt []*ExtMountMap `protobuf:"bytes,23,rep,name=ext_mnt,json=extMnt" json:"ext_mnt,omitempty"` + ManageCgroups *bool `protobuf:"varint,24,opt,name=manage_cgroups,json=manageCgroups" json:"manage_cgroups,omitempty"` + CgRoot []*CgroupRoot `protobuf:"bytes,25,rep,name=cg_root,json=cgRoot" json:"cg_root,omitempty"` + RstSibling *bool `protobuf:"varint,26,opt,name=rst_sibling,json=rstSibling" json:"rst_sibling,omitempty"` + InheritFd []*InheritFd `protobuf:"bytes,27,rep,name=inherit_fd,json=inheritFd" json:"inherit_fd,omitempty"` + AutoExtMnt *bool `protobuf:"varint,28,opt,name=auto_ext_mnt,json=autoExtMnt" json:"auto_ext_mnt,omitempty"` + ExtSharing *bool `protobuf:"varint,29,opt,name=ext_sharing,json=extSharing" json:"ext_sharing,omitempty"` + ExtMasters *bool `protobuf:"varint,30,opt,name=ext_masters,json=extMasters" json:"ext_masters,omitempty"` + SkipMnt []string `protobuf:"bytes,31,rep,name=skip_mnt,json=skipMnt" json:"skip_mnt,omitempty"` + EnableFs []string `protobuf:"bytes,32,rep,name=enable_fs,json=enableFs" json:"enable_fs,omitempty"` + UnixSkIno []*UnixSk `protobuf:"bytes,33,rep,name=unix_sk_ino,json=unixSkIno" json:"unix_sk_ino,omitempty"` + ManageCgroupsMode *CriuCgMode `protobuf:"varint,34,opt,name=manage_cgroups_mode,json=manageCgroupsMode,enum=CriuCgMode" json:"manage_cgroups_mode,omitempty"` + GhostLimit *uint32 `protobuf:"varint,35,opt,name=ghost_limit,json=ghostLimit,def=1048576" json:"ghost_limit,omitempty"` + IrmapScanPaths []string `protobuf:"bytes,36,rep,name=irmap_scan_paths,json=irmapScanPaths" json:"irmap_scan_paths,omitempty"` + External []string `protobuf:"bytes,37,rep,name=external" json:"external,omitempty"` + EmptyNs *uint32 `protobuf:"varint,38,opt,name=empty_ns,json=emptyNs" json:"empty_ns,omitempty"` + JoinNs []*JoinNamespace `protobuf:"bytes,39,rep,name=join_ns,json=joinNs" json:"join_ns,omitempty"` + CgroupProps *string `protobuf:"bytes,41,opt,name=cgroup_props,json=cgroupProps" json:"cgroup_props,omitempty"` + CgroupPropsFile *string `protobuf:"bytes,42,opt,name=cgroup_props_file,json=cgroupPropsFile" json:"cgroup_props_file,omitempty"` + CgroupDumpController []string `protobuf:"bytes,43,rep,name=cgroup_dump_controller,json=cgroupDumpController" json:"cgroup_dump_controller,omitempty"` + FreezeCgroup *string `protobuf:"bytes,44,opt,name=freeze_cgroup,json=freezeCgroup" json:"freeze_cgroup,omitempty"` + Timeout *uint32 `protobuf:"varint,45,opt,name=timeout" json:"timeout,omitempty"` + TcpSkipInFlight *bool `protobuf:"varint,46,opt,name=tcp_skip_in_flight,json=tcpSkipInFlight" json:"tcp_skip_in_flight,omitempty"` + WeakSysctls *bool `protobuf:"varint,47,opt,name=weak_sysctls,json=weakSysctls" json:"weak_sysctls,omitempty"` + LazyPages *bool `protobuf:"varint,48,opt,name=lazy_pages,json=lazyPages" json:"lazy_pages,omitempty"` + StatusFd *int32 `protobuf:"varint,49,opt,name=status_fd,json=statusFd" json:"status_fd,omitempty"` + OrphanPtsMaster *bool `protobuf:"varint,50,opt,name=orphan_pts_master,json=orphanPtsMaster" json:"orphan_pts_master,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *CriuOpts) Reset() { *m = CriuOpts{} } +func (m *CriuOpts) String() string { return proto.CompactTextString(m) } +func (*CriuOpts) ProtoMessage() {} +func (*CriuOpts) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{7} } + +const Default_CriuOpts_LogLevel int32 = 2 +const Default_CriuOpts_CpuCap uint32 = 4294967295 +const Default_CriuOpts_GhostLimit uint32 = 1048576 + +func (m *CriuOpts) GetImagesDirFd() int32 { + if m != nil && m.ImagesDirFd != nil { + return *m.ImagesDirFd + } + return 0 +} + +func (m *CriuOpts) GetPid() int32 { + if m != nil && m.Pid != nil { + return *m.Pid + } + return 0 +} + +func (m *CriuOpts) GetLeaveRunning() bool { + if m != nil && m.LeaveRunning != nil { + return *m.LeaveRunning + } + return false +} + +func (m *CriuOpts) GetExtUnixSk() bool { + if m != nil && m.ExtUnixSk != nil { + return *m.ExtUnixSk + } + return false +} + +func (m *CriuOpts) GetTcpEstablished() bool { + if m != nil && m.TcpEstablished != nil { + return *m.TcpEstablished + } + return false +} + +func (m *CriuOpts) GetEvasiveDevices() bool { + if m != nil && m.EvasiveDevices != nil { + return *m.EvasiveDevices + } + return false +} + +func (m *CriuOpts) GetShellJob() bool { + if m != nil && m.ShellJob != nil { + return *m.ShellJob + } + return false +} + +func (m *CriuOpts) GetFileLocks() bool { + if m != nil && m.FileLocks != nil { + return *m.FileLocks + } + return false +} + +func (m *CriuOpts) GetLogLevel() int32 { + if m != nil && m.LogLevel != nil { + return *m.LogLevel + } + return Default_CriuOpts_LogLevel +} + +func (m *CriuOpts) GetLogFile() string { + if m != nil && m.LogFile != nil { + return *m.LogFile + } + return "" +} + +func (m *CriuOpts) GetPs() *CriuPageServerInfo { + if m != nil { + return m.Ps + } + return nil +} + +func (m *CriuOpts) GetNotifyScripts() bool { + if m != nil && m.NotifyScripts != nil { + return *m.NotifyScripts + } + return false +} + +func (m *CriuOpts) GetRoot() string { + if m != nil && m.Root != nil { + return *m.Root + } + return "" +} + +func (m *CriuOpts) GetParentImg() string { + if m != nil && m.ParentImg != nil { + return *m.ParentImg + } + return "" +} + +func (m *CriuOpts) GetTrackMem() bool { + if m != nil && m.TrackMem != nil { + return *m.TrackMem + } + return false +} + +func (m *CriuOpts) GetAutoDedup() bool { + if m != nil && m.AutoDedup != nil { + return *m.AutoDedup + } + return false +} + +func (m *CriuOpts) GetWorkDirFd() int32 { + if m != nil && m.WorkDirFd != nil { + return *m.WorkDirFd + } + return 0 +} + +func (m *CriuOpts) GetLinkRemap() bool { + if m != nil && m.LinkRemap != nil { + return *m.LinkRemap + } + return false +} + +func (m *CriuOpts) GetVeths() []*CriuVethPair { + if m != nil { + return m.Veths + } + return nil +} + +func (m *CriuOpts) GetCpuCap() uint32 { + if m != nil && m.CpuCap != nil { + return *m.CpuCap + } + return Default_CriuOpts_CpuCap +} + +func (m *CriuOpts) GetForceIrmap() bool { + if m != nil && m.ForceIrmap != nil { + return *m.ForceIrmap + } + return false +} + +func (m *CriuOpts) GetExecCmd() []string { + if m != nil { + return m.ExecCmd + } + return nil +} + +func (m *CriuOpts) GetExtMnt() []*ExtMountMap { + if m != nil { + return m.ExtMnt + } + return nil +} + +func (m *CriuOpts) GetManageCgroups() bool { + if m != nil && m.ManageCgroups != nil { + return *m.ManageCgroups + } + return false +} + +func (m *CriuOpts) GetCgRoot() []*CgroupRoot { + if m != nil { + return m.CgRoot + } + return nil +} + +func (m *CriuOpts) GetRstSibling() bool { + if m != nil && m.RstSibling != nil { + return *m.RstSibling + } + return false +} + +func (m *CriuOpts) GetInheritFd() []*InheritFd { + if m != nil { + return m.InheritFd + } + return nil +} + +func (m *CriuOpts) GetAutoExtMnt() bool { + if m != nil && m.AutoExtMnt != nil { + return *m.AutoExtMnt + } + return false +} + +func (m *CriuOpts) GetExtSharing() bool { + if m != nil && m.ExtSharing != nil { + return *m.ExtSharing + } + return false +} + +func (m *CriuOpts) GetExtMasters() bool { + if m != nil && m.ExtMasters != nil { + return *m.ExtMasters + } + return false +} + +func (m *CriuOpts) GetSkipMnt() []string { + if m != nil { + return m.SkipMnt + } + return nil +} + +func (m *CriuOpts) GetEnableFs() []string { + if m != nil { + return m.EnableFs + } + return nil +} + +func (m *CriuOpts) GetUnixSkIno() []*UnixSk { + if m != nil { + return m.UnixSkIno + } + return nil +} + +func (m *CriuOpts) GetManageCgroupsMode() CriuCgMode { + if m != nil && m.ManageCgroupsMode != nil { + return *m.ManageCgroupsMode + } + return CriuCgMode_IGNORE +} + +func (m *CriuOpts) GetGhostLimit() uint32 { + if m != nil && m.GhostLimit != nil { + return *m.GhostLimit + } + return Default_CriuOpts_GhostLimit +} + +func (m *CriuOpts) GetIrmapScanPaths() []string { + if m != nil { + return m.IrmapScanPaths + } + return nil +} + +func (m *CriuOpts) GetExternal() []string { + if m != nil { + return m.External + } + return nil +} + +func (m *CriuOpts) GetEmptyNs() uint32 { + if m != nil && m.EmptyNs != nil { + return *m.EmptyNs + } + return 0 +} + +func (m *CriuOpts) GetJoinNs() []*JoinNamespace { + if m != nil { + return m.JoinNs + } + return nil +} + +func (m *CriuOpts) GetCgroupProps() string { + if m != nil && m.CgroupProps != nil { + return *m.CgroupProps + } + return "" +} + +func (m *CriuOpts) GetCgroupPropsFile() string { + if m != nil && m.CgroupPropsFile != nil { + return *m.CgroupPropsFile + } + return "" +} + +func (m *CriuOpts) GetCgroupDumpController() []string { + if m != nil { + return m.CgroupDumpController + } + return nil +} + +func (m *CriuOpts) GetFreezeCgroup() string { + if m != nil && m.FreezeCgroup != nil { + return *m.FreezeCgroup + } + return "" +} + +func (m *CriuOpts) GetTimeout() uint32 { + if m != nil && m.Timeout != nil { + return *m.Timeout + } + return 0 +} + +func (m *CriuOpts) GetTcpSkipInFlight() bool { + if m != nil && m.TcpSkipInFlight != nil { + return *m.TcpSkipInFlight + } + return false +} + +func (m *CriuOpts) GetWeakSysctls() bool { + if m != nil && m.WeakSysctls != nil { + return *m.WeakSysctls + } + return false +} + +func (m *CriuOpts) GetLazyPages() bool { + if m != nil && m.LazyPages != nil { + return *m.LazyPages + } + return false +} + +func (m *CriuOpts) GetStatusFd() int32 { + if m != nil && m.StatusFd != nil { + return *m.StatusFd + } + return 0 +} + +func (m *CriuOpts) GetOrphanPtsMaster() bool { + if m != nil && m.OrphanPtsMaster != nil { + return *m.OrphanPtsMaster + } + return false +} + +type CriuDumpResp struct { + Restored *bool `protobuf:"varint,1,opt,name=restored" json:"restored,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *CriuDumpResp) Reset() { *m = CriuDumpResp{} } +func (m *CriuDumpResp) String() string { return proto.CompactTextString(m) } +func (*CriuDumpResp) ProtoMessage() {} +func (*CriuDumpResp) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{8} } + +func (m *CriuDumpResp) GetRestored() bool { + if m != nil && m.Restored != nil { + return *m.Restored + } + return false +} + +type CriuRestoreResp struct { + Pid *int32 `protobuf:"varint,1,req,name=pid" json:"pid,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *CriuRestoreResp) Reset() { *m = CriuRestoreResp{} } +func (m *CriuRestoreResp) String() string { return proto.CompactTextString(m) } +func (*CriuRestoreResp) ProtoMessage() {} +func (*CriuRestoreResp) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{9} } + +func (m *CriuRestoreResp) GetPid() int32 { + if m != nil && m.Pid != nil { + return *m.Pid + } + return 0 +} + +type CriuNotify struct { + Script *string `protobuf:"bytes,1,opt,name=script" json:"script,omitempty"` + Pid *int32 `protobuf:"varint,2,opt,name=pid" json:"pid,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *CriuNotify) Reset() { *m = CriuNotify{} } +func (m *CriuNotify) String() string { return proto.CompactTextString(m) } +func (*CriuNotify) ProtoMessage() {} +func (*CriuNotify) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{10} } + +func (m *CriuNotify) GetScript() string { + if m != nil && m.Script != nil { + return *m.Script + } + return "" +} + +func (m *CriuNotify) GetPid() int32 { + if m != nil && m.Pid != nil { + return *m.Pid + } + return 0 +} + +// +// List of features which can queried via +// CRIU_REQ_TYPE__FEATURE_CHECK +type CriuFeatures struct { + MemTrack *bool `protobuf:"varint,1,opt,name=mem_track,json=memTrack" json:"mem_track,omitempty"` + LazyPages *bool `protobuf:"varint,2,opt,name=lazy_pages,json=lazyPages" json:"lazy_pages,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *CriuFeatures) Reset() { *m = CriuFeatures{} } +func (m *CriuFeatures) String() string { return proto.CompactTextString(m) } +func (*CriuFeatures) ProtoMessage() {} +func (*CriuFeatures) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{11} } + +func (m *CriuFeatures) GetMemTrack() bool { + if m != nil && m.MemTrack != nil { + return *m.MemTrack + } + return false +} + +func (m *CriuFeatures) GetLazyPages() bool { + if m != nil && m.LazyPages != nil { + return *m.LazyPages + } + return false +} + +type CriuReq struct { + Type *CriuReqType `protobuf:"varint,1,req,name=type,enum=CriuReqType" json:"type,omitempty"` + Opts *CriuOpts `protobuf:"bytes,2,opt,name=opts" json:"opts,omitempty"` + NotifySuccess *bool `protobuf:"varint,3,opt,name=notify_success,json=notifySuccess" json:"notify_success,omitempty"` + // + // When set service won't close the connection but + // will wait for more req-s to appear. Works not + // for all request types. + KeepOpen *bool `protobuf:"varint,4,opt,name=keep_open,json=keepOpen" json:"keep_open,omitempty"` + // + // 'features' can be used to query which features + // are supported by the installed criu/kernel + // via RPC. + Features *CriuFeatures `protobuf:"bytes,5,opt,name=features" json:"features,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *CriuReq) Reset() { *m = CriuReq{} } +func (m *CriuReq) String() string { return proto.CompactTextString(m) } +func (*CriuReq) ProtoMessage() {} +func (*CriuReq) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{12} } + +func (m *CriuReq) GetType() CriuReqType { + if m != nil && m.Type != nil { + return *m.Type + } + return CriuReqType_EMPTY +} + +func (m *CriuReq) GetOpts() *CriuOpts { + if m != nil { + return m.Opts + } + return nil +} + +func (m *CriuReq) GetNotifySuccess() bool { + if m != nil && m.NotifySuccess != nil { + return *m.NotifySuccess + } + return false +} + +func (m *CriuReq) GetKeepOpen() bool { + if m != nil && m.KeepOpen != nil { + return *m.KeepOpen + } + return false +} + +func (m *CriuReq) GetFeatures() *CriuFeatures { + if m != nil { + return m.Features + } + return nil +} + +type CriuResp struct { + Type *CriuReqType `protobuf:"varint,1,req,name=type,enum=CriuReqType" json:"type,omitempty"` + Success *bool `protobuf:"varint,2,req,name=success" json:"success,omitempty"` + Dump *CriuDumpResp `protobuf:"bytes,3,opt,name=dump" json:"dump,omitempty"` + Restore *CriuRestoreResp `protobuf:"bytes,4,opt,name=restore" json:"restore,omitempty"` + Notify *CriuNotify `protobuf:"bytes,5,opt,name=notify" json:"notify,omitempty"` + Ps *CriuPageServerInfo `protobuf:"bytes,6,opt,name=ps" json:"ps,omitempty"` + CrErrno *int32 `protobuf:"varint,7,opt,name=cr_errno,json=crErrno" json:"cr_errno,omitempty"` + Features *CriuFeatures `protobuf:"bytes,8,opt,name=features" json:"features,omitempty"` + CrErrmsg *string `protobuf:"bytes,9,opt,name=cr_errmsg,json=crErrmsg" json:"cr_errmsg,omitempty"` + Version *CriuVersion `protobuf:"bytes,10,opt,name=version" json:"version,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *CriuResp) Reset() { *m = CriuResp{} } +func (m *CriuResp) String() string { return proto.CompactTextString(m) } +func (*CriuResp) ProtoMessage() {} +func (*CriuResp) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{13} } + +func (m *CriuResp) GetType() CriuReqType { + if m != nil && m.Type != nil { + return *m.Type + } + return CriuReqType_EMPTY +} + +func (m *CriuResp) GetSuccess() bool { + if m != nil && m.Success != nil { + return *m.Success + } + return false +} + +func (m *CriuResp) GetDump() *CriuDumpResp { + if m != nil { + return m.Dump + } + return nil +} + +func (m *CriuResp) GetRestore() *CriuRestoreResp { + if m != nil { + return m.Restore + } + return nil +} + +func (m *CriuResp) GetNotify() *CriuNotify { + if m != nil { + return m.Notify + } + return nil +} + +func (m *CriuResp) GetPs() *CriuPageServerInfo { + if m != nil { + return m.Ps + } + return nil +} + +func (m *CriuResp) GetCrErrno() int32 { + if m != nil && m.CrErrno != nil { + return *m.CrErrno + } + return 0 +} + +func (m *CriuResp) GetFeatures() *CriuFeatures { + if m != nil { + return m.Features + } + return nil +} + +func (m *CriuResp) GetCrErrmsg() string { + if m != nil && m.CrErrmsg != nil { + return *m.CrErrmsg + } + return "" +} + +func (m *CriuResp) GetVersion() *CriuVersion { + if m != nil { + return m.Version + } + return nil +} + +// Answer for criu_req_type.VERSION requests +type CriuVersion struct { + Major *int32 `protobuf:"varint,1,req,name=major" json:"major,omitempty"` + Minor *int32 `protobuf:"varint,2,req,name=minor" json:"minor,omitempty"` + Gitid *string `protobuf:"bytes,3,opt,name=gitid" json:"gitid,omitempty"` + Sublevel *int32 `protobuf:"varint,4,opt,name=sublevel" json:"sublevel,omitempty"` + Extra *int32 `protobuf:"varint,5,opt,name=extra" json:"extra,omitempty"` + Name *string `protobuf:"bytes,6,opt,name=name" json:"name,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *CriuVersion) Reset() { *m = CriuVersion{} } +func (m *CriuVersion) String() string { return proto.CompactTextString(m) } +func (*CriuVersion) ProtoMessage() {} +func (*CriuVersion) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{14} } + +func (m *CriuVersion) GetMajor() int32 { + if m != nil && m.Major != nil { + return *m.Major + } + return 0 +} + +func (m *CriuVersion) GetMinor() int32 { + if m != nil && m.Minor != nil { + return *m.Minor + } + return 0 +} + +func (m *CriuVersion) GetGitid() string { + if m != nil && m.Gitid != nil { + return *m.Gitid + } + return "" +} + +func (m *CriuVersion) GetSublevel() int32 { + if m != nil && m.Sublevel != nil { + return *m.Sublevel + } + return 0 +} + +func (m *CriuVersion) GetExtra() int32 { + if m != nil && m.Extra != nil { + return *m.Extra + } + return 0 +} + +func (m *CriuVersion) GetName() string { + if m != nil && m.Name != nil { + return *m.Name + } + return "" +} + +func init() { + proto.RegisterType((*CriuPageServerInfo)(nil), "criu_page_server_info") + proto.RegisterType((*CriuVethPair)(nil), "criu_veth_pair") + proto.RegisterType((*ExtMountMap)(nil), "ext_mount_map") + proto.RegisterType((*JoinNamespace)(nil), "join_namespace") + proto.RegisterType((*InheritFd)(nil), "inherit_fd") + proto.RegisterType((*CgroupRoot)(nil), "cgroup_root") + proto.RegisterType((*UnixSk)(nil), "unix_sk") + proto.RegisterType((*CriuOpts)(nil), "criu_opts") + proto.RegisterType((*CriuDumpResp)(nil), "criu_dump_resp") + proto.RegisterType((*CriuRestoreResp)(nil), "criu_restore_resp") + proto.RegisterType((*CriuNotify)(nil), "criu_notify") + proto.RegisterType((*CriuFeatures)(nil), "criu_features") + proto.RegisterType((*CriuReq)(nil), "criu_req") + proto.RegisterType((*CriuResp)(nil), "criu_resp") + proto.RegisterType((*CriuVersion)(nil), "criu_version") + proto.RegisterEnum("CriuCgMode", CriuCgMode_name, CriuCgMode_value) + proto.RegisterEnum("CriuReqType", CriuReqType_name, CriuReqType_value) +} + +func init() { proto.RegisterFile("criurpc.proto", fileDescriptor0) } + +var fileDescriptor0 = []byte{ + // 1781 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x56, 0xdd, 0x72, 0x5b, 0xb7, + 0x11, 0x0e, 0x29, 0xfe, 0x1c, 0x82, 0x3f, 0xa6, 0x10, 0xdb, 0x81, 0x93, 0xda, 0x62, 0xe8, 0x28, + 0x51, 0x15, 0x97, 0x4d, 0x58, 0x3b, 0xae, 0x33, 0xed, 0x85, 0x47, 0x22, 0x5d, 0x36, 0x92, 0xc8, + 0x01, 0x25, 0xcf, 0xe4, 0x0a, 0x73, 0x74, 0x0e, 0x48, 0xc1, 0x3c, 0x7f, 0x05, 0x40, 0x45, 0xf2, + 0x83, 0xf4, 0x29, 0xfa, 0x0c, 0x7d, 0x84, 0xbe, 0x4e, 0x6f, 0x3b, 0xbb, 0x00, 0x65, 0x29, 0xc9, + 0xb4, 0xbd, 0xc3, 0x7e, 0x58, 0x00, 0xbb, 0xfb, 0xed, 0x0f, 0x48, 0x3b, 0xd2, 0x6a, 0xad, 0x8b, + 0x68, 0x50, 0xe8, 0xdc, 0xe6, 0xfd, 0x25, 0x79, 0x00, 0x80, 0x28, 0xc2, 0xa5, 0x14, 0x46, 0xea, + 0x4b, 0xa9, 0x85, 0xca, 0x16, 0x39, 0x65, 0xa4, 0x1e, 0xc6, 0xb1, 0x96, 0xc6, 0xb0, 0x52, 0xaf, + 0xb4, 0xd7, 0xe0, 0x1b, 0x91, 0x52, 0x52, 0x29, 0x72, 0x6d, 0x59, 0xb9, 0x57, 0xda, 0xab, 0x72, + 0x5c, 0xd3, 0x2e, 0xd9, 0x2a, 0x54, 0xcc, 0xb6, 0x10, 0x82, 0x25, 0xed, 0x90, 0xf2, 0x22, 0x66, + 0x15, 0x04, 0xca, 0x8b, 0xb8, 0xff, 0x27, 0xd2, 0xc1, 0x87, 0x2e, 0xa5, 0xbd, 0x10, 0x45, 0xa8, + 0x34, 0xfd, 0x98, 0x54, 0xd5, 0x42, 0xa8, 0x8c, 0x95, 0x7a, 0xe5, 0xbd, 0x06, 0xaf, 0xa8, 0xc5, + 0x24, 0xa3, 0x0f, 0x48, 0x4d, 0x2d, 0x44, 0xbe, 0x86, 0xeb, 0x01, 0xad, 0xaa, 0xc5, 0x74, 0x6d, + 0xfb, 0x7f, 0x20, 0x6d, 0x79, 0x65, 0x45, 0x9a, 0xaf, 0x33, 0x2b, 0xd2, 0xb0, 0x80, 0x07, 0x57, + 0xf2, 0xda, 0x1f, 0x85, 0x25, 0x20, 0x97, 0x61, 0xe2, 0x8f, 0xc1, 0xb2, 0xff, 0x96, 0x74, 0xde, + 0xe5, 0x2a, 0x13, 0x59, 0x98, 0x4a, 0x53, 0x84, 0x91, 0x04, 0xa3, 0x32, 0xe3, 0x0f, 0x95, 0x33, + 0x43, 0x3f, 0x21, 0xf5, 0xcc, 0x88, 0x85, 0x4a, 0xa4, 0x3f, 0x57, 0xcb, 0xcc, 0x58, 0x25, 0x92, + 0x7e, 0x46, 0x1a, 0xf2, 0xca, 0xea, 0x50, 0xe4, 0x85, 0x45, 0xaf, 0x1a, 0x3c, 0x40, 0x60, 0x5a, + 0xd8, 0xfe, 0x80, 0x10, 0x95, 0x5d, 0x48, 0xad, 0xac, 0x58, 0xc4, 0xbf, 0x62, 0x89, 0x73, 0x1d, + 0x2e, 0x74, 0xae, 0xbf, 0x20, 0xcd, 0x68, 0xa9, 0xf3, 0x75, 0x21, 0x74, 0x9e, 0x5b, 0x88, 0x5f, + 0x64, 0x75, 0xe2, 0xc3, 0x8a, 0x6b, 0x8c, 0x69, 0x68, 0x2f, 0xbc, 0x15, 0xb8, 0xee, 0xef, 0x90, + 0xfa, 0x3a, 0x53, 0x57, 0xc2, 0xac, 0xe8, 0x7d, 0x52, 0x55, 0x59, 0x1e, 0x4b, 0x7c, 0xa5, 0xcd, + 0x9d, 0xd0, 0xff, 0x57, 0x9b, 0x34, 0x30, 0xa6, 0x79, 0x61, 0x0d, 0xed, 0x93, 0xb6, 0x4a, 0xc3, + 0xa5, 0x34, 0x22, 0x56, 0x5a, 0x2c, 0x62, 0xd4, 0xad, 0xf2, 0xa6, 0x03, 0x0f, 0x95, 0x1e, 0xc7, + 0x1b, 0x9a, 0xca, 0x1f, 0x68, 0x7a, 0x4a, 0xda, 0x89, 0x0c, 0x2f, 0xa5, 0xd0, 0xeb, 0x2c, 0x53, + 0xd9, 0x12, 0x9d, 0x0d, 0x78, 0x0b, 0x41, 0xee, 0x30, 0xfa, 0x84, 0x34, 0x21, 0xfa, 0xde, 0x1a, + 0x24, 0x35, 0xe0, 0x10, 0xa0, 0xb3, 0x4c, 0x5d, 0xcd, 0x57, 0xf4, 0x2b, 0x72, 0xcf, 0x46, 0x85, + 0x90, 0xc6, 0x86, 0xe7, 0x89, 0x32, 0x17, 0x32, 0x66, 0x55, 0xd4, 0xe9, 0xd8, 0xa8, 0x18, 0x7d, + 0x40, 0x41, 0x51, 0x5e, 0x86, 0x46, 0x5d, 0x4a, 0x11, 0xcb, 0x4b, 0x15, 0x49, 0xc3, 0x6a, 0x4e, + 0xd1, 0xc3, 0x87, 0x0e, 0x85, 0xf8, 0x9b, 0x0b, 0x99, 0x24, 0xe2, 0x5d, 0x7e, 0xce, 0xea, 0xa8, + 0x12, 0x20, 0xf0, 0xd7, 0xfc, 0x9c, 0x3e, 0x26, 0x04, 0x28, 0x13, 0x49, 0x1e, 0xad, 0x0c, 0x0b, + 0x9c, 0x35, 0x80, 0x1c, 0x01, 0x40, 0x9f, 0x90, 0x46, 0x92, 0x2f, 0x45, 0x22, 0x2f, 0x65, 0xc2, + 0x1a, 0xe0, 0xea, 0xf7, 0xa5, 0x21, 0x0f, 0x92, 0x7c, 0x79, 0x04, 0x10, 0x7d, 0x44, 0x60, 0xed, + 0x58, 0x27, 0x2e, 0xb5, 0x93, 0x7c, 0x89, 0xb4, 0x7f, 0x49, 0xca, 0x85, 0x61, 0xcd, 0x5e, 0x69, + 0xaf, 0x39, 0x7c, 0x38, 0xf8, 0xd5, 0xc2, 0xe0, 0xe5, 0xc2, 0xd0, 0x5d, 0xd2, 0xc9, 0x72, 0xab, + 0x16, 0xd7, 0xc2, 0x44, 0x5a, 0x15, 0xd6, 0xb0, 0x16, 0x5a, 0xd1, 0x76, 0xe8, 0xdc, 0x81, 0xc0, + 0x2a, 0x30, 0xce, 0xda, 0x8e, 0x69, 0x64, 0xff, 0x31, 0x21, 0x45, 0xa8, 0x65, 0x66, 0x85, 0x4a, + 0x97, 0xac, 0x83, 0x3b, 0x0d, 0x87, 0x4c, 0xd2, 0x25, 0x38, 0x6e, 0x75, 0x18, 0xad, 0x44, 0x2a, + 0x53, 0x76, 0xcf, 0x39, 0x8e, 0xc0, 0xb1, 0x4c, 0xe1, 0x6c, 0xb8, 0xb6, 0xb9, 0x88, 0x65, 0xbc, + 0x2e, 0x58, 0xd7, 0x39, 0x0e, 0xc8, 0x21, 0x00, 0x40, 0xd3, 0x4f, 0xb9, 0x5e, 0x6d, 0xf8, 0xdf, + 0x46, 0x96, 0x1b, 0x00, 0x39, 0xf6, 0x1f, 0x13, 0x92, 0xa8, 0x6c, 0x25, 0xb4, 0x4c, 0xc3, 0x82, + 0x51, 0x77, 0x1c, 0x10, 0x0e, 0x00, 0xdd, 0x25, 0x55, 0x28, 0x4e, 0xc3, 0x3e, 0xee, 0x6d, 0xed, + 0x35, 0x87, 0xf7, 0x06, 0x77, 0xeb, 0x95, 0xbb, 0x5d, 0xfa, 0x94, 0xd4, 0xa3, 0x62, 0x2d, 0xa2, + 0xb0, 0x60, 0xf7, 0x7b, 0xa5, 0xbd, 0xf6, 0xf7, 0xe4, 0xf9, 0xf0, 0xd5, 0xf3, 0x57, 0xdf, 0xbd, + 0x1c, 0xbe, 0x7a, 0xc1, 0x6b, 0x51, 0xb1, 0x3e, 0x08, 0x0b, 0xba, 0x43, 0x9a, 0x8b, 0x5c, 0x47, + 0x52, 0x28, 0x0d, 0x6f, 0x3d, 0xc0, 0xb7, 0x08, 0x42, 0x13, 0x40, 0x80, 0x04, 0x79, 0x25, 0x23, + 0x11, 0xa5, 0x31, 0x7b, 0xd8, 0xdb, 0x02, 0x12, 0x40, 0x3e, 0x48, 0x21, 0x49, 0xea, 0x58, 0xeb, + 0x99, 0x65, 0x9f, 0xa0, 0x25, 0x9d, 0xc1, 0x9d, 0xda, 0xe7, 0x35, 0x79, 0x65, 0x8f, 0x33, 0x0b, + 0x2c, 0xa4, 0x61, 0x06, 0xfc, 0xb8, 0xf2, 0x32, 0x8c, 0x39, 0x16, 0x1c, 0x7a, 0xe0, 0x40, 0xba, + 0x4b, 0xea, 0xd1, 0x12, 0x4b, 0x8f, 0x3d, 0xc2, 0xfb, 0x5a, 0x83, 0x5b, 0xe5, 0xc8, 0x6b, 0xd1, + 0x92, 0x03, 0x31, 0x3b, 0xa4, 0xa9, 0x8d, 0x15, 0x46, 0x9d, 0x27, 0x50, 0x07, 0x9f, 0x3a, 0x93, + 0xb5, 0xb1, 0x73, 0x87, 0xd0, 0xfd, 0xdb, 0x65, 0xcf, 0x3e, 0xc3, 0xab, 0x9a, 0x83, 0x0f, 0x10, + 0x6f, 0xf8, 0xf5, 0x38, 0xa6, 0x3d, 0xd2, 0x42, 0xa6, 0x36, 0x8e, 0xfc, 0xc6, 0xdd, 0x06, 0xd8, + 0xc8, 0x19, 0xbf, 0xe3, 0x6a, 0xca, 0x5c, 0x84, 0x1a, 0x9e, 0x7b, 0xec, 0x14, 0xe4, 0x95, 0x9d, + 0x3b, 0x64, 0xa3, 0x90, 0x86, 0xc6, 0x4a, 0x6d, 0xd8, 0x93, 0x1b, 0x85, 0x63, 0x87, 0x40, 0x08, + 0xcd, 0x4a, 0x15, 0x78, 0xff, 0x8e, 0x0b, 0x21, 0xc8, 0x70, 0x39, 0xb4, 0xaf, 0x2c, 0x3c, 0x4f, + 0xa4, 0x58, 0x18, 0xd6, 0xc3, 0xbd, 0xc0, 0x01, 0x63, 0x43, 0xf7, 0x48, 0xd3, 0x57, 0xb2, 0x50, + 0x59, 0xce, 0x3e, 0x47, 0x47, 0x82, 0x81, 0xc7, 0x78, 0x63, 0x8d, 0x45, 0x3d, 0xc9, 0x72, 0xfa, + 0x67, 0xf2, 0xf1, 0xdd, 0x00, 0x8b, 0x14, 0x9a, 0x50, 0xbf, 0x57, 0xda, 0xeb, 0x0c, 0xdb, 0x2e, + 0x3f, 0xa2, 0x25, 0x82, 0x7c, 0xfb, 0x4e, 0xd0, 0x8f, 0xf3, 0x58, 0xc2, 0x43, 0xcb, 0x8b, 0xdc, + 0x58, 0x91, 0xa8, 0x54, 0x59, 0xf6, 0x14, 0xb3, 0xa5, 0xfe, 0xed, 0x37, 0xcf, 0xff, 0xf8, 0xe2, + 0xe5, 0x77, 0x9c, 0xe0, 0xde, 0x11, 0x6c, 0xd1, 0x3d, 0xd2, 0xc5, 0x44, 0x11, 0x26, 0x0a, 0x33, + 0x01, 0xdd, 0xcf, 0xb0, 0x2f, 0xd0, 0xec, 0x0e, 0xe2, 0xf3, 0x28, 0xcc, 0x66, 0x80, 0xd2, 0x4f, + 0x21, 0x6f, 0xac, 0xd4, 0x59, 0x98, 0xb0, 0x5d, 0xef, 0x98, 0x97, 0x31, 0xa7, 0xd2, 0xc2, 0x5e, + 0x8b, 0xcc, 0xb0, 0x2f, 0xe1, 0x31, 0x5e, 0x47, 0xf9, 0x04, 0x7c, 0xae, 0xbb, 0x51, 0x60, 0xd8, + 0x57, 0x3e, 0xbb, 0xef, 0x8e, 0x06, 0x5e, 0x03, 0xf9, 0xc4, 0xd0, 0xcf, 0x49, 0xcb, 0x67, 0x47, + 0xa1, 0xf3, 0xc2, 0xb0, 0xdf, 0x62, 0x85, 0xfa, 0x06, 0x3e, 0x03, 0x88, 0xee, 0x93, 0xed, 0xdb, + 0x2a, 0xae, 0x93, 0xec, 0xa3, 0xde, 0xbd, 0x5b, 0x7a, 0xd8, 0x51, 0x9e, 0x93, 0x87, 0x5e, 0x37, + 0x5e, 0xa7, 0x85, 0x88, 0xf2, 0xcc, 0xea, 0x3c, 0x49, 0xa4, 0x66, 0x5f, 0xa3, 0xf5, 0xf7, 0xdd, + 0xee, 0xe1, 0x3a, 0x2d, 0x0e, 0x6e, 0xf6, 0xa0, 0x2b, 0x2f, 0xb4, 0x94, 0xef, 0x37, 0x81, 0x67, + 0xcf, 0xf0, 0xf6, 0x96, 0x03, 0x5d, 0x8c, 0x61, 0x42, 0x5b, 0x95, 0x4a, 0x98, 0x95, 0xbf, 0x73, + 0xde, 0x7a, 0x91, 0x7e, 0x4d, 0x28, 0xf4, 0x63, 0xcc, 0x0e, 0x95, 0x89, 0x45, 0xa2, 0x96, 0x17, + 0x96, 0x0d, 0x30, 0x83, 0xa0, 0x53, 0xcf, 0x57, 0xaa, 0x98, 0x64, 0x63, 0x84, 0xc1, 0xe1, 0x9f, + 0x64, 0xb8, 0x12, 0xe6, 0xda, 0x44, 0x36, 0x31, 0xec, 0xf7, 0xa8, 0xd6, 0x04, 0x6c, 0xee, 0x20, + 0x6c, 0x1c, 0xe1, 0xfb, 0x6b, 0xec, 0x85, 0x86, 0x7d, 0xe3, 0x1b, 0x47, 0xf8, 0xfe, 0x7a, 0x06, + 0x00, 0x36, 0x6b, 0x1b, 0xda, 0xb5, 0x81, 0xba, 0xf8, 0x16, 0xbb, 0x4e, 0xe0, 0x80, 0x71, 0x0c, + 0xc1, 0xca, 0x75, 0x71, 0x01, 0xb4, 0x5a, 0xe3, 0xb3, 0x99, 0x0d, 0x9d, 0x29, 0x6e, 0x63, 0x66, + 0x8d, 0x4b, 0xe9, 0xfe, 0x33, 0xff, 0x47, 0xc0, 0x50, 0x69, 0x69, 0x0a, 0xa0, 0x5b, 0x4b, 0x63, + 0x73, 0x2d, 0x63, 0x9c, 0x97, 0x01, 0xbf, 0x91, 0xfb, 0xbb, 0x64, 0x1b, 0xb5, 0x3d, 0xe0, 0x0e, + 0xf8, 0x09, 0xe7, 0x66, 0x1f, 0x2c, 0xfb, 0x2f, 0x49, 0x13, 0xd5, 0x5c, 0x6b, 0xa6, 0x0f, 0x49, + 0xcd, 0xf5, 0x6c, 0x3f, 0x7f, 0xbd, 0xf4, 0xcb, 0xd1, 0xd8, 0xff, 0xc1, 0xfd, 0x95, 0xc4, 0x42, + 0x86, 0x76, 0xad, 0x9d, 0x9f, 0xa9, 0x4c, 0x05, 0xb6, 0xe3, 0x8d, 0x35, 0xa9, 0x4c, 0x4f, 0x41, + 0xfe, 0x59, 0x8c, 0xca, 0x3f, 0x8b, 0x51, 0xff, 0x9f, 0x25, 0x12, 0x78, 0x6b, 0xff, 0x46, 0xfb, + 0xa4, 0x62, 0xaf, 0x0b, 0x37, 0xcd, 0x3b, 0xc3, 0xce, 0x60, 0xb3, 0x21, 0x00, 0xe5, 0xb8, 0x47, + 0x9f, 0x90, 0x0a, 0x8c, 0x75, 0xbc, 0xa9, 0x39, 0x24, 0x83, 0x9b, 0x41, 0xcf, 0x11, 0xbf, 0x3d, + 0x82, 0xd6, 0x51, 0x04, 0xdf, 0xb4, 0xad, 0x3b, 0x23, 0xc8, 0x81, 0x60, 0xf3, 0x4a, 0xca, 0x42, + 0xe4, 0x85, 0xcc, 0xfc, 0xe0, 0x0e, 0x00, 0x98, 0x16, 0x32, 0xa3, 0xfb, 0x24, 0xd8, 0x38, 0x87, + 0x03, 0xbb, 0xb9, 0xb1, 0x65, 0x83, 0xf2, 0x9b, 0xfd, 0xfe, 0xbf, 0xcb, 0xfe, 0xb3, 0x81, 0x61, + 0xfe, 0x7f, 0x3c, 0x60, 0xa4, 0xbe, 0x31, 0x0d, 0xbe, 0x35, 0x01, 0xdf, 0x88, 0xf4, 0x29, 0xa9, + 0x00, 0xc5, 0x68, 0xf1, 0xcd, 0xa0, 0xb9, 0x21, 0x9d, 0xe3, 0x26, 0x7d, 0x46, 0xea, 0x9e, 0x59, + 0xb4, 0xbb, 0x39, 0xa4, 0x83, 0x5f, 0xd0, 0xcd, 0x37, 0x2a, 0xf4, 0x0b, 0x52, 0x73, 0x8e, 0x7b, + 0x47, 0x5a, 0x83, 0x5b, 0xa4, 0x73, 0xbf, 0xe7, 0xe7, 0x7b, 0xed, 0x7f, 0xce, 0xf7, 0x47, 0x40, + 0x96, 0x90, 0x5a, 0x67, 0x39, 0xfe, 0x3e, 0xaa, 0xbc, 0x1e, 0xe9, 0x11, 0x88, 0x77, 0x62, 0x16, + 0xfc, 0xf7, 0x98, 0x41, 0xf0, 0xdd, 0x35, 0xa9, 0x59, 0xe2, 0x4f, 0xa4, 0xc1, 0x03, 0xbc, 0x27, + 0x35, 0x4b, 0x18, 0x73, 0x97, 0x52, 0x1b, 0x95, 0x67, 0xf8, 0x0b, 0x69, 0x6e, 0x1a, 0xaa, 0x07, + 0xf9, 0x66, 0xb7, 0xff, 0xf7, 0x12, 0x69, 0xdd, 0xde, 0x81, 0xdf, 0x60, 0x1a, 0xbe, 0xcb, 0xb5, + 0xcf, 0x72, 0x27, 0x20, 0xaa, 0xb2, 0x5c, 0xfb, 0x8f, 0xa7, 0x13, 0x00, 0x5d, 0x2a, 0xeb, 0xbf, + 0xe6, 0x0d, 0xee, 0x04, 0x28, 0x2b, 0xb3, 0x3e, 0x77, 0x3f, 0xa4, 0x8a, 0x2f, 0x58, 0x2f, 0xc3, + 0x09, 0xfc, 0xe9, 0x62, 0x20, 0xab, 0xdc, 0x09, 0xf0, 0x95, 0x81, 0x5e, 0x89, 0xb1, 0x6b, 0x70, + 0x5c, 0xef, 0x0b, 0x6f, 0x97, 0x1f, 0x01, 0x94, 0x90, 0xda, 0xe4, 0xcd, 0xc9, 0x94, 0x8f, 0xba, + 0x1f, 0xd1, 0x26, 0xa9, 0x1f, 0xbc, 0x11, 0x27, 0xd3, 0x93, 0x51, 0xb7, 0x44, 0x1b, 0xa4, 0x3a, + 0xe3, 0xd3, 0xd9, 0xbc, 0x5b, 0xa6, 0x01, 0xa9, 0xcc, 0xa7, 0xe3, 0xd3, 0xee, 0x16, 0xac, 0xc6, + 0x67, 0x47, 0x47, 0xdd, 0x0a, 0x9c, 0x9b, 0x9f, 0xf2, 0xc9, 0xc1, 0x69, 0xb7, 0x0a, 0xe7, 0x0e, + 0x47, 0xe3, 0xd7, 0x67, 0x47, 0xa7, 0xdd, 0xda, 0xfe, 0x3f, 0x4a, 0xbe, 0x04, 0x37, 0x99, 0x05, + 0x37, 0x8d, 0x8e, 0x67, 0xa7, 0x3f, 0x76, 0x3f, 0x82, 0xf3, 0x87, 0x67, 0xc7, 0xb3, 0x6e, 0x09, + 0xce, 0xf0, 0xd1, 0xfc, 0x14, 0x1e, 0x2e, 0x83, 0xc6, 0xc1, 0x5f, 0x46, 0x07, 0x3f, 0x74, 0xb7, + 0x68, 0x8b, 0x04, 0x33, 0x3e, 0x12, 0xa8, 0x55, 0xa1, 0xf7, 0x48, 0x73, 0xf6, 0xfa, 0xcd, 0x48, + 0xcc, 0x47, 0xfc, 0xed, 0x88, 0x77, 0xab, 0xf0, 0xec, 0xc9, 0xf4, 0x74, 0x32, 0xfe, 0xb1, 0x5b, + 0xa3, 0x5d, 0xd2, 0x3a, 0x98, 0x9d, 0x4d, 0x4e, 0xc6, 0x53, 0xa7, 0x5e, 0xa7, 0xdb, 0xa4, 0xbd, + 0x41, 0xdc, 0x7d, 0x01, 0x40, 0xe3, 0xd1, 0xeb, 0xd3, 0x33, 0x3e, 0xf2, 0x50, 0x03, 0x9e, 0x7e, + 0x3b, 0xe2, 0xf3, 0xc9, 0xf4, 0xa4, 0x4b, 0xfe, 0x13, 0x00, 0x00, 0xff, 0xff, 0x5f, 0x2a, 0xaf, + 0x49, 0x5b, 0x0d, 0x00, 0x00, +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/criurpc/criurpc.proto b/vendor/github.com/opencontainers/runc/libcontainer/criurpc/criurpc.proto new file mode 100644 index 000000000..48e42e26e --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/criurpc/criurpc.proto @@ -0,0 +1,209 @@ +syntax = "proto2"; + +message criu_page_server_info { + optional string address = 1; + optional int32 port = 2; + optional int32 pid = 3; + optional int32 fd = 4; +} + +message criu_veth_pair { + required string if_in = 1; + required string if_out = 2; +}; + +message ext_mount_map { + required string key = 1; + required string val = 2; +}; + +message join_namespace { + required string ns = 1; + required string ns_file = 2; + optional string extra_opt = 3; +} + +message inherit_fd { + required string key = 1; + required int32 fd = 2; +}; + +message cgroup_root { + optional string ctrl = 1; + required string path = 2; +}; + +message unix_sk { + required uint32 inode = 1; +}; + +enum criu_cg_mode { + IGNORE = 0; + CG_NONE = 1; + PROPS = 2; + SOFT = 3; + FULL = 4; + STRICT = 5; + DEFAULT = 6; +}; + +message criu_opts { + required int32 images_dir_fd = 1; + optional int32 pid = 2; /* if not set on dump, will dump requesting process */ + + optional bool leave_running = 3; + optional bool ext_unix_sk = 4; + optional bool tcp_established = 5; + optional bool evasive_devices = 6; + optional bool shell_job = 7; + optional bool file_locks = 8; + optional int32 log_level = 9 [default = 2]; + optional string log_file = 10; /* No subdirs are allowed. Consider using work-dir */ + + optional criu_page_server_info ps = 11; + + optional bool notify_scripts = 12; + + optional string root = 13; + optional string parent_img = 14; + optional bool track_mem = 15; + optional bool auto_dedup = 16; + + optional int32 work_dir_fd = 17; + optional bool link_remap = 18; + repeated criu_veth_pair veths = 19; /* DEPRECATED, use external instead */ + + optional uint32 cpu_cap = 20 [default = 0xffffffff]; + optional bool force_irmap = 21; + repeated string exec_cmd = 22; + + repeated ext_mount_map ext_mnt = 23; /* DEPRECATED, use external instead */ + optional bool manage_cgroups = 24; /* backward compatibility */ + repeated cgroup_root cg_root = 25; + + optional bool rst_sibling = 26; /* swrk only */ + repeated inherit_fd inherit_fd = 27; /* swrk only */ + + optional bool auto_ext_mnt = 28; + optional bool ext_sharing = 29; + optional bool ext_masters = 30; + + repeated string skip_mnt = 31; + repeated string enable_fs = 32; + + repeated unix_sk unix_sk_ino = 33; /* DEPRECATED, use external instead */ + + optional criu_cg_mode manage_cgroups_mode = 34; + optional uint32 ghost_limit = 35 [default = 0x100000]; + repeated string irmap_scan_paths = 36; + repeated string external = 37; + optional uint32 empty_ns = 38; + repeated join_namespace join_ns = 39; + + optional string cgroup_props = 41; + optional string cgroup_props_file = 42; + repeated string cgroup_dump_controller = 43; + + optional string freeze_cgroup = 44; + optional uint32 timeout = 45; + optional bool tcp_skip_in_flight = 46; + optional bool weak_sysctls = 47; + optional bool lazy_pages = 48; + optional int32 status_fd = 49; + optional bool orphan_pts_master = 50; +} + +message criu_dump_resp { + optional bool restored = 1; +} + +message criu_restore_resp { + required int32 pid = 1; +} + +message criu_notify { + optional string script = 1; + optional int32 pid = 2; +} + +enum criu_req_type { + EMPTY = 0; + DUMP = 1; + RESTORE = 2; + CHECK = 3; + PRE_DUMP = 4; + PAGE_SERVER = 5; + + NOTIFY = 6; + + CPUINFO_DUMP = 7; + CPUINFO_CHECK = 8; + + FEATURE_CHECK = 9; + + VERSION = 10; +} + +/* + * List of features which can queried via + * CRIU_REQ_TYPE__FEATURE_CHECK + */ +message criu_features { + optional bool mem_track = 1; + optional bool lazy_pages = 2; +} + +/* + * Request -- each type corresponds to must-be-there + * request arguments of respective type + */ + +message criu_req { + required criu_req_type type = 1; + + optional criu_opts opts = 2; + optional bool notify_success = 3; + + /* + * When set service won't close the connection but + * will wait for more req-s to appear. Works not + * for all request types. + */ + optional bool keep_open = 4; + /* + * 'features' can be used to query which features + * are supported by the installed criu/kernel + * via RPC. + */ + optional criu_features features = 5; +} + +/* + * Response -- it states whether the request was served + * and additional request-specific information + */ + +message criu_resp { + required criu_req_type type = 1; + required bool success = 2; + + optional criu_dump_resp dump = 3; + optional criu_restore_resp restore = 4; + optional criu_notify notify = 5; + optional criu_page_server_info ps = 6; + + optional int32 cr_errno = 7; + optional criu_features features = 8; + optional string cr_errmsg = 9; + optional criu_version version = 10; +} + +/* Answer for criu_req_type.VERSION requests */ +message criu_version { + required int32 major = 1; + required int32 minor = 2; + optional string gitid = 3; + optional int32 sublevel = 4; + optional int32 extra = 5; + optional string name = 6; +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/devices/devices_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/devices/devices_linux.go new file mode 100644 index 000000000..461dc097c --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/devices/devices_linux.go @@ -0,0 +1,100 @@ +package devices + +import ( + "errors" + "io/ioutil" + "os" + "path/filepath" + + "github.com/opencontainers/runc/libcontainer/configs" + + "golang.org/x/sys/unix" +) + +var ( + ErrNotADevice = errors.New("not a device node") +) + +// Testing dependencies +var ( + unixLstat = unix.Lstat + ioutilReadDir = ioutil.ReadDir +) + +// Given the path to a device and its cgroup_permissions(which cannot be easily queried) look up the information about a linux device and return that information as a Device struct. +func DeviceFromPath(path, permissions string) (*configs.Device, error) { + var stat unix.Stat_t + err := unixLstat(path, &stat) + if err != nil { + return nil, err + } + var ( + devType rune + mode = stat.Mode + ) + switch { + case mode&unix.S_IFBLK == unix.S_IFBLK: + devType = 'b' + case mode&unix.S_IFCHR == unix.S_IFCHR: + devType = 'c' + default: + return nil, ErrNotADevice + } + devNumber := int(stat.Rdev) + uid := stat.Uid + gid := stat.Gid + return &configs.Device{ + Type: devType, + Path: path, + Major: Major(devNumber), + Minor: Minor(devNumber), + Permissions: permissions, + FileMode: os.FileMode(mode), + Uid: uid, + Gid: gid, + }, nil +} + +func HostDevices() ([]*configs.Device, error) { + return getDevices("/dev") +} + +func getDevices(path string) ([]*configs.Device, error) { + files, err := ioutilReadDir(path) + if err != nil { + return nil, err + } + out := []*configs.Device{} + for _, f := range files { + switch { + case f.IsDir(): + switch f.Name() { + // ".lxc" & ".lxd-mounts" added to address https://github.com/lxc/lxd/issues/2825 + case "pts", "shm", "fd", "mqueue", ".lxc", ".lxd-mounts": + continue + default: + sub, err := getDevices(filepath.Join(path, f.Name())) + if err != nil { + return nil, err + } + + out = append(out, sub...) + continue + } + case f.Name() == "console": + continue + } + device, err := DeviceFromPath(filepath.Join(path, f.Name()), "rwm") + if err != nil { + if err == ErrNotADevice { + continue + } + if os.IsNotExist(err) { + continue + } + return nil, err + } + out = append(out, device) + } + return out, nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/devices/devices_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/devices/devices_unsupported.go new file mode 100644 index 000000000..6649b9f2d --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/devices/devices_unsupported.go @@ -0,0 +1,3 @@ +// +build !linux + +package devices diff --git a/vendor/github.com/opencontainers/runc/libcontainer/devices/number.go b/vendor/github.com/opencontainers/runc/libcontainer/devices/number.go new file mode 100644 index 000000000..885b6e5dd --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/devices/number.go @@ -0,0 +1,24 @@ +// +build linux freebsd + +package devices + +/* + +This code provides support for manipulating linux device numbers. It should be replaced by normal syscall functions once http://code.google.com/p/go/issues/detail?id=8106 is solved. + +You can read what they are here: + + - http://www.makelinux.net/ldd3/chp-3-sect-2 + - http://www.linux-tutorial.info/modules.php?name=MContent&pageid=94 + +Note! These are NOT the same as the MAJOR(dev_t device);, MINOR(dev_t device); and MKDEV(int major, int minor); functions as defined in <linux/kdev_t.h> as the representation of device numbers used by go is different than the one used internally to the kernel! - https://github.com/torvalds/linux/blob/master/include/linux/kdev_t.h#L9 + +*/ + +func Major(devNumber int) int64 { + return int64((devNumber >> 8) & 0xfff) +} + +func Minor(devNumber int) int64 { + return int64((devNumber & 0xff) | ((devNumber >> 12) & 0xfff00)) +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/error.go b/vendor/github.com/opencontainers/runc/libcontainer/error.go new file mode 100644 index 000000000..21a3789ba --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/error.go @@ -0,0 +1,70 @@ +package libcontainer + +import "io" + +// ErrorCode is the API error code type. +type ErrorCode int + +// API error codes. +const ( + // Factory errors + IdInUse ErrorCode = iota + InvalidIdFormat + + // Container errors + ContainerNotExists + ContainerPaused + ContainerNotStopped + ContainerNotRunning + ContainerNotPaused + + // Process errors + NoProcessOps + + // Common errors + ConfigInvalid + ConsoleExists + SystemError +) + +func (c ErrorCode) String() string { + switch c { + case IdInUse: + return "Id already in use" + case InvalidIdFormat: + return "Invalid format" + case ContainerPaused: + return "Container paused" + case ConfigInvalid: + return "Invalid configuration" + case SystemError: + return "System error" + case ContainerNotExists: + return "Container does not exist" + case ContainerNotStopped: + return "Container is not stopped" + case ContainerNotRunning: + return "Container is not running" + case ConsoleExists: + return "Console exists for process" + case ContainerNotPaused: + return "Container is not paused" + case NoProcessOps: + return "No process operations" + default: + return "Unknown error" + } +} + +// Error is the API error type. +type Error interface { + error + + // Returns an error if it failed to write the detail of the Error to w. + // The detail of the Error may include the error message and a + // representation of the stack trace. + Detail(w io.Writer) error + + // Returns the error code for this error. + Code() ErrorCode +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/factory.go b/vendor/github.com/opencontainers/runc/libcontainer/factory.go new file mode 100644 index 000000000..0986cd77e --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/factory.go @@ -0,0 +1,44 @@ +package libcontainer + +import ( + "github.com/opencontainers/runc/libcontainer/configs" +) + +type Factory interface { + // Creates a new container with the given id and starts the initial process inside it. + // id must be a string containing only letters, digits and underscores and must contain + // between 1 and 1024 characters, inclusive. + // + // The id must not already be in use by an existing container. Containers created using + // a factory with the same path (and filesystem) must have distinct ids. + // + // Returns the new container with a running process. + // + // errors: + // IdInUse - id is already in use by a container + // InvalidIdFormat - id has incorrect format + // ConfigInvalid - config is invalid + // Systemerror - System error + // + // On error, any partially created container parts are cleaned up (the operation is atomic). + Create(id string, config *configs.Config) (Container, error) + + // Load takes an ID for an existing container and returns the container information + // from the state. This presents a read only view of the container. + // + // errors: + // Path does not exist + // System error + Load(id string) (Container, error) + + // StartInitialization is an internal API to libcontainer used during the reexec of the + // container. + // + // Errors: + // Pipe connection error + // System error + StartInitialization() error + + // Type returns info string about factory type (e.g. lxc, libcontainer...) + Type() string +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/factory_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/factory_linux.go new file mode 100644 index 000000000..42b6f5a05 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/factory_linux.go @@ -0,0 +1,325 @@ +// +build linux + +package libcontainer + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "regexp" + "runtime/debug" + "strconv" + + "github.com/docker/docker/pkg/mount" + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fs" + "github.com/opencontainers/runc/libcontainer/cgroups/rootless" + "github.com/opencontainers/runc/libcontainer/cgroups/systemd" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/configs/validate" + "github.com/opencontainers/runc/libcontainer/utils" + + "golang.org/x/sys/unix" +) + +const ( + stateFilename = "state.json" + execFifoFilename = "exec.fifo" +) + +var idRegex = regexp.MustCompile(`^[\w+-\.]+$`) + +// InitArgs returns an options func to configure a LinuxFactory with the +// provided init binary path and arguments. +func InitArgs(args ...string) func(*LinuxFactory) error { + return func(l *LinuxFactory) (err error) { + if len(args) > 0 { + // Resolve relative paths to ensure that its available + // after directory changes. + if args[0], err = filepath.Abs(args[0]); err != nil { + return newGenericError(err, ConfigInvalid) + } + } + + l.InitArgs = args + return nil + } +} + +// SystemdCgroups is an options func to configure a LinuxFactory to return +// containers that use systemd to create and manage cgroups. +func SystemdCgroups(l *LinuxFactory) error { + l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager { + return &systemd.Manager{ + Cgroups: config, + Paths: paths, + } + } + return nil +} + +// Cgroupfs is an options func to configure a LinuxFactory to return +// containers that use the native cgroups filesystem implementation to +// create and manage cgroups. +func Cgroupfs(l *LinuxFactory) error { + l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager { + return &fs.Manager{ + Cgroups: config, + Paths: paths, + } + } + return nil +} + +// RootlessCgroups is an options func to configure a LinuxFactory to +// return containers that use the "rootless" cgroup manager, which will +// fail to do any operations not possible to do with an unprivileged user. +// It should only be used in conjunction with rootless containers. +func RootlessCgroups(l *LinuxFactory) error { + l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager { + return &rootless.Manager{ + Cgroups: config, + Paths: paths, + } + } + return nil +} + +// TmpfsRoot is an option func to mount LinuxFactory.Root to tmpfs. +func TmpfsRoot(l *LinuxFactory) error { + mounted, err := mount.Mounted(l.Root) + if err != nil { + return err + } + if !mounted { + if err := unix.Mount("tmpfs", l.Root, "tmpfs", 0, ""); err != nil { + return err + } + } + return nil +} + +// CriuPath returns an option func to configure a LinuxFactory with the +// provided criupath +func CriuPath(criupath string) func(*LinuxFactory) error { + return func(l *LinuxFactory) error { + l.CriuPath = criupath + return nil + } +} + +// New returns a linux based container factory based in the root directory and +// configures the factory with the provided option funcs. +func New(root string, options ...func(*LinuxFactory) error) (Factory, error) { + if root != "" { + if err := os.MkdirAll(root, 0700); err != nil { + return nil, newGenericError(err, SystemError) + } + } + l := &LinuxFactory{ + Root: root, + InitArgs: []string{"/proc/self/exe", "init"}, + Validator: validate.New(), + CriuPath: "criu", + } + Cgroupfs(l) + for _, opt := range options { + if err := opt(l); err != nil { + return nil, err + } + } + return l, nil +} + +// LinuxFactory implements the default factory interface for linux based systems. +type LinuxFactory struct { + // Root directory for the factory to store state. + Root string + + // InitArgs are arguments for calling the init responsibilities for spawning + // a container. + InitArgs []string + + // CriuPath is the path to the criu binary used for checkpoint and restore of + // containers. + CriuPath string + + // Validator provides validation to container configurations. + Validator validate.Validator + + // NewCgroupsManager returns an initialized cgroups manager for a single container. + NewCgroupsManager func(config *configs.Cgroup, paths map[string]string) cgroups.Manager +} + +func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) { + if l.Root == "" { + return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid) + } + if err := l.validateID(id); err != nil { + return nil, err + } + if err := l.Validator.Validate(config); err != nil { + return nil, newGenericError(err, ConfigInvalid) + } + containerRoot := filepath.Join(l.Root, id) + if _, err := os.Stat(containerRoot); err == nil { + return nil, newGenericError(fmt.Errorf("container with id exists: %v", id), IdInUse) + } else if !os.IsNotExist(err) { + return nil, newGenericError(err, SystemError) + } + if err := os.MkdirAll(containerRoot, 0711); err != nil { + return nil, newGenericError(err, SystemError) + } + if err := os.Chown(containerRoot, unix.Geteuid(), unix.Getegid()); err != nil { + return nil, newGenericError(err, SystemError) + } + if config.Rootless { + RootlessCgroups(l) + } + c := &linuxContainer{ + id: id, + root: containerRoot, + config: config, + initArgs: l.InitArgs, + criuPath: l.CriuPath, + cgroupManager: l.NewCgroupsManager(config.Cgroups, nil), + } + c.state = &stoppedState{c: c} + return c, nil +} + +func (l *LinuxFactory) Load(id string) (Container, error) { + if l.Root == "" { + return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid) + } + containerRoot := filepath.Join(l.Root, id) + state, err := l.loadState(containerRoot, id) + if err != nil { + return nil, err + } + r := &nonChildProcess{ + processPid: state.InitProcessPid, + processStartTime: state.InitProcessStartTime, + fds: state.ExternalDescriptors, + } + // We have to use the RootlessManager. + if state.Rootless { + RootlessCgroups(l) + } + c := &linuxContainer{ + initProcess: r, + initProcessStartTime: state.InitProcessStartTime, + id: id, + config: &state.Config, + initArgs: l.InitArgs, + criuPath: l.CriuPath, + cgroupManager: l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths), + root: containerRoot, + created: state.Created, + } + c.state = &loadedState{c: c} + if err := c.refreshState(); err != nil { + return nil, err + } + return c, nil +} + +func (l *LinuxFactory) Type() string { + return "libcontainer" +} + +// StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state +// This is a low level implementation detail of the reexec and should not be consumed externally +func (l *LinuxFactory) StartInitialization() (err error) { + var ( + pipefd, rootfd int + consoleSocket *os.File + envInitPipe = os.Getenv("_LIBCONTAINER_INITPIPE") + envStateDir = os.Getenv("_LIBCONTAINER_STATEDIR") + envConsole = os.Getenv("_LIBCONTAINER_CONSOLE") + ) + + // Get the INITPIPE. + pipefd, err = strconv.Atoi(envInitPipe) + if err != nil { + return fmt.Errorf("unable to convert _LIBCONTAINER_INITPIPE=%s to int: %s", envInitPipe, err) + } + + var ( + pipe = os.NewFile(uintptr(pipefd), "pipe") + it = initType(os.Getenv("_LIBCONTAINER_INITTYPE")) + ) + defer pipe.Close() + + // Only init processes have STATEDIR. + rootfd = -1 + if it == initStandard { + if rootfd, err = strconv.Atoi(envStateDir); err != nil { + return fmt.Errorf("unable to convert _LIBCONTAINER_STATEDIR=%s to int: %s", envStateDir, err) + } + } + + if envConsole != "" { + console, err := strconv.Atoi(envConsole) + if err != nil { + return fmt.Errorf("unable to convert _LIBCONTAINER_CONSOLE=%s to int: %s", envConsole, err) + } + consoleSocket = os.NewFile(uintptr(console), "console-socket") + defer consoleSocket.Close() + } + + // clear the current process's environment to clean any libcontainer + // specific env vars. + os.Clearenv() + + defer func() { + // We have an error during the initialization of the container's init, + // send it back to the parent process in the form of an initError. + if werr := utils.WriteJSON(pipe, syncT{procError}); werr != nil { + fmt.Fprintln(os.Stderr, err) + return + } + if werr := utils.WriteJSON(pipe, newSystemError(err)); werr != nil { + fmt.Fprintln(os.Stderr, err) + return + } + }() + defer func() { + if e := recover(); e != nil { + err = fmt.Errorf("panic from initialization: %v, %v", e, string(debug.Stack())) + } + }() + + i, err := newContainerInit(it, pipe, consoleSocket, rootfd) + if err != nil { + return err + } + + // If Init succeeds, syscall.Exec will not return, hence none of the defers will be called. + return i.Init() +} + +func (l *LinuxFactory) loadState(root, id string) (*State, error) { + f, err := os.Open(filepath.Join(root, stateFilename)) + if err != nil { + if os.IsNotExist(err) { + return nil, newGenericError(fmt.Errorf("container %q does not exist", id), ContainerNotExists) + } + return nil, newGenericError(err, SystemError) + } + defer f.Close() + var state *State + if err := json.NewDecoder(f).Decode(&state); err != nil { + return nil, newGenericError(err, SystemError) + } + return state, nil +} + +func (l *LinuxFactory) validateID(id string) error { + if !idRegex.MatchString(id) { + return newGenericError(fmt.Errorf("invalid id format: %v", id), InvalidIdFormat) + } + + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/generic_error.go b/vendor/github.com/opencontainers/runc/libcontainer/generic_error.go new file mode 100644 index 000000000..6e7de2fe7 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/generic_error.go @@ -0,0 +1,92 @@ +package libcontainer + +import ( + "fmt" + "io" + "text/template" + "time" + + "github.com/opencontainers/runc/libcontainer/stacktrace" +) + +var errorTemplate = template.Must(template.New("error").Parse(`Timestamp: {{.Timestamp}} +Code: {{.ECode}} +{{if .Message }} +Message: {{.Message}} +{{end}} +Frames:{{range $i, $frame := .Stack.Frames}} +--- +{{$i}}: {{$frame.Function}} +Package: {{$frame.Package}} +File: {{$frame.File}}@{{$frame.Line}}{{end}} +`)) + +func newGenericError(err error, c ErrorCode) Error { + if le, ok := err.(Error); ok { + return le + } + gerr := &genericError{ + Timestamp: time.Now(), + Err: err, + ECode: c, + Stack: stacktrace.Capture(1), + } + if err != nil { + gerr.Message = err.Error() + } + return gerr +} + +func newSystemError(err error) Error { + return createSystemError(err, "") +} + +func newSystemErrorWithCausef(err error, cause string, v ...interface{}) Error { + return createSystemError(err, fmt.Sprintf(cause, v...)) +} + +func newSystemErrorWithCause(err error, cause string) Error { + return createSystemError(err, cause) +} + +// createSystemError creates the specified error with the correct number of +// stack frames skipped. This is only to be called by the other functions for +// formatting the error. +func createSystemError(err error, cause string) Error { + gerr := &genericError{ + Timestamp: time.Now(), + Err: err, + ECode: SystemError, + Cause: cause, + Stack: stacktrace.Capture(2), + } + if err != nil { + gerr.Message = err.Error() + } + return gerr +} + +type genericError struct { + Timestamp time.Time + ECode ErrorCode + Err error `json:"-"` + Cause string + Message string + Stack stacktrace.Stacktrace +} + +func (e *genericError) Error() string { + if e.Cause == "" { + return e.Message + } + frame := e.Stack.Frames[0] + return fmt.Sprintf("%s:%d: %s caused %q", frame.File, frame.Line, e.Cause, e.Message) +} + +func (e *genericError) Code() ErrorCode { + return e.ECode +} + +func (e *genericError) Detail(w io.Writer) error { + return errorTemplate.Execute(w, e) +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/init_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/init_linux.go new file mode 100644 index 000000000..63afd28eb --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/init_linux.go @@ -0,0 +1,502 @@ +// +build linux + +package libcontainer + +import ( + "encoding/json" + "fmt" + "io" + "net" + "os" + "strings" + "syscall" // only for Errno + "unsafe" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/system" + "github.com/opencontainers/runc/libcontainer/user" + "github.com/opencontainers/runc/libcontainer/utils" + + "github.com/sirupsen/logrus" + "github.com/vishvananda/netlink" + "golang.org/x/sys/unix" +) + +type initType string + +const ( + initSetns initType = "setns" + initStandard initType = "standard" +) + +type pid struct { + Pid int `json:"pid"` +} + +// network is an internal struct used to setup container networks. +type network struct { + configs.Network + + // TempVethPeerName is a unique temporary veth peer name that was placed into + // the container's namespace. + TempVethPeerName string `json:"temp_veth_peer_name"` +} + +// initConfig is used for transferring parameters from Exec() to Init() +type initConfig struct { + Args []string `json:"args"` + Env []string `json:"env"` + Cwd string `json:"cwd"` + Capabilities *configs.Capabilities `json:"capabilities"` + ProcessLabel string `json:"process_label"` + AppArmorProfile string `json:"apparmor_profile"` + NoNewPrivileges bool `json:"no_new_privileges"` + User string `json:"user"` + AdditionalGroups []string `json:"additional_groups"` + Config *configs.Config `json:"config"` + Networks []*network `json:"network"` + PassedFilesCount int `json:"passed_files_count"` + ContainerId string `json:"containerid"` + Rlimits []configs.Rlimit `json:"rlimits"` + CreateConsole bool `json:"create_console"` + Rootless bool `json:"rootless"` +} + +type initer interface { + Init() error +} + +func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, stateDirFD int) (initer, error) { + var config *initConfig + if err := json.NewDecoder(pipe).Decode(&config); err != nil { + return nil, err + } + if err := populateProcessEnvironment(config.Env); err != nil { + return nil, err + } + switch t { + case initSetns: + return &linuxSetnsInit{ + pipe: pipe, + consoleSocket: consoleSocket, + config: config, + }, nil + case initStandard: + return &linuxStandardInit{ + pipe: pipe, + consoleSocket: consoleSocket, + parentPid: unix.Getppid(), + config: config, + stateDirFD: stateDirFD, + }, nil + } + return nil, fmt.Errorf("unknown init type %q", t) +} + +// populateProcessEnvironment loads the provided environment variables into the +// current processes's environment. +func populateProcessEnvironment(env []string) error { + for _, pair := range env { + p := strings.SplitN(pair, "=", 2) + if len(p) < 2 { + return fmt.Errorf("invalid environment '%v'", pair) + } + if err := os.Setenv(p[0], p[1]); err != nil { + return err + } + } + return nil +} + +// finalizeNamespace drops the caps, sets the correct user +// and working dir, and closes any leaked file descriptors +// before executing the command inside the namespace +func finalizeNamespace(config *initConfig) error { + // Ensure that all unwanted fds we may have accidentally + // inherited are marked close-on-exec so they stay out of the + // container + if err := utils.CloseExecFrom(config.PassedFilesCount + 3); err != nil { + return err + } + + capabilities := &configs.Capabilities{} + if config.Capabilities != nil { + capabilities = config.Capabilities + } else if config.Config.Capabilities != nil { + capabilities = config.Config.Capabilities + } + w, err := newContainerCapList(capabilities) + if err != nil { + return err + } + // drop capabilities in bounding set before changing user + if err := w.ApplyBoundingSet(); err != nil { + return err + } + // preserve existing capabilities while we change users + if err := system.SetKeepCaps(); err != nil { + return err + } + if err := setupUser(config); err != nil { + return err + } + if err := system.ClearKeepCaps(); err != nil { + return err + } + if err := w.ApplyCaps(); err != nil { + return err + } + if config.Cwd != "" { + if err := unix.Chdir(config.Cwd); err != nil { + return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %v", config.Cwd, err) + } + } + return nil +} + +// setupConsole sets up the console from inside the container, and sends the +// master pty fd to the config.Pipe (using cmsg). This is done to ensure that +// consoles are scoped to a container properly (see runc#814 and the many +// issues related to that). This has to be run *after* we've pivoted to the new +// rootfs (and the users' configuration is entirely set up). +func setupConsole(socket *os.File, config *initConfig, mount bool) error { + defer socket.Close() + // At this point, /dev/ptmx points to something that we would expect. We + // used to change the owner of the slave path, but since the /dev/pts mount + // can have gid=X set (at the users' option). So touching the owner of the + // slave PTY is not necessary, as the kernel will handle that for us. Note + // however, that setupUser (specifically fixStdioPermissions) *will* change + // the UID owner of the console to be the user the process will run as (so + // they can actually control their console). + console, err := newConsole() + if err != nil { + return err + } + // After we return from here, we don't need the console anymore. + defer console.Close() + + linuxConsole, ok := console.(*linuxConsole) + if !ok { + return fmt.Errorf("failed to cast console to *linuxConsole") + } + // Mount the console inside our rootfs. + if mount { + if err := linuxConsole.mount(); err != nil { + return err + } + } + // While we can access console.master, using the API is a good idea. + if err := utils.SendFd(socket, linuxConsole.File()); err != nil { + return err + } + // Now, dup over all the things. + return linuxConsole.dupStdio() +} + +// syncParentReady sends to the given pipe a JSON payload which indicates that +// the init is ready to Exec the child process. It then waits for the parent to +// indicate that it is cleared to Exec. +func syncParentReady(pipe io.ReadWriter) error { + // Tell parent. + if err := writeSync(pipe, procReady); err != nil { + return err + } + + // Wait for parent to give the all-clear. + if err := readSync(pipe, procRun); err != nil { + return err + } + + return nil +} + +// syncParentHooks sends to the given pipe a JSON payload which indicates that +// the parent should execute pre-start hooks. It then waits for the parent to +// indicate that it is cleared to resume. +func syncParentHooks(pipe io.ReadWriter) error { + // Tell parent. + if err := writeSync(pipe, procHooks); err != nil { + return err + } + + // Wait for parent to give the all-clear. + if err := readSync(pipe, procResume); err != nil { + return err + } + + return nil +} + +// setupUser changes the groups, gid, and uid for the user inside the container +func setupUser(config *initConfig) error { + // Set up defaults. + defaultExecUser := user.ExecUser{ + Uid: 0, + Gid: 0, + Home: "/", + } + + passwdPath, err := user.GetPasswdPath() + if err != nil { + return err + } + + groupPath, err := user.GetGroupPath() + if err != nil { + return err + } + + execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath) + if err != nil { + return err + } + + var addGroups []int + if len(config.AdditionalGroups) > 0 { + addGroups, err = user.GetAdditionalGroupsPath(config.AdditionalGroups, groupPath) + if err != nil { + return err + } + } + + if config.Rootless { + if execUser.Uid != 0 { + return fmt.Errorf("cannot run as a non-root user in a rootless container") + } + + if execUser.Gid != 0 { + return fmt.Errorf("cannot run as a non-root group in a rootless container") + } + + // We cannot set any additional groups in a rootless container and thus we + // bail if the user asked us to do so. TODO: We currently can't do this + // earlier, but if libcontainer.Process.User was typesafe this might work. + if len(addGroups) > 0 { + return fmt.Errorf("cannot set any additional groups in a rootless container") + } + } + + // before we change to the container's user make sure that the processes STDIO + // is correctly owned by the user that we are switching to. + if err := fixStdioPermissions(config, execUser); err != nil { + return err + } + + // This isn't allowed in an unprivileged user namespace since Linux 3.19. + // There's nothing we can do about /etc/group entries, so we silently + // ignore setting groups here (since the user didn't explicitly ask us to + // set the group). + if !config.Rootless { + suppGroups := append(execUser.Sgids, addGroups...) + if err := unix.Setgroups(suppGroups); err != nil { + return err + } + } + + if err := system.Setgid(execUser.Gid); err != nil { + return err + } + + if err := system.Setuid(execUser.Uid); err != nil { + return err + } + + // if we didn't get HOME already, set it based on the user's HOME + if envHome := os.Getenv("HOME"); envHome == "" { + if err := os.Setenv("HOME", execUser.Home); err != nil { + return err + } + } + return nil +} + +// fixStdioPermissions fixes the permissions of PID 1's STDIO within the container to the specified user. +// The ownership needs to match because it is created outside of the container and needs to be +// localized. +func fixStdioPermissions(config *initConfig, u *user.ExecUser) error { + var null unix.Stat_t + if err := unix.Stat("/dev/null", &null); err != nil { + return err + } + for _, fd := range []uintptr{ + os.Stdin.Fd(), + os.Stderr.Fd(), + os.Stdout.Fd(), + } { + var s unix.Stat_t + if err := unix.Fstat(int(fd), &s); err != nil { + return err + } + + // Skip chown of /dev/null if it was used as one of the STDIO fds. + if s.Rdev == null.Rdev { + continue + } + + // Skip chown if s.Gid is actually an unmapped gid in the host. While + // this is a bit dodgy if it just so happens that the console _is_ + // owned by overflow_gid, there's no way for us to disambiguate this as + // a userspace program. + if _, err := config.Config.HostGID(int(s.Gid)); err != nil { + continue + } + + // We only change the uid owner (as it is possible for the mount to + // prefer a different gid, and there's no reason for us to change it). + // The reason why we don't just leave the default uid=X mount setup is + // that users expect to be able to actually use their console. Without + // this code, you couldn't effectively run as a non-root user inside a + // container and also have a console set up. + if err := unix.Fchown(int(fd), u.Uid, int(s.Gid)); err != nil { + return err + } + } + return nil +} + +// setupNetwork sets up and initializes any network interface inside the container. +func setupNetwork(config *initConfig) error { + for _, config := range config.Networks { + strategy, err := getStrategy(config.Type) + if err != nil { + return err + } + if err := strategy.initialize(config); err != nil { + return err + } + } + return nil +} + +func setupRoute(config *configs.Config) error { + for _, config := range config.Routes { + _, dst, err := net.ParseCIDR(config.Destination) + if err != nil { + return err + } + src := net.ParseIP(config.Source) + if src == nil { + return fmt.Errorf("Invalid source for route: %s", config.Source) + } + gw := net.ParseIP(config.Gateway) + if gw == nil { + return fmt.Errorf("Invalid gateway for route: %s", config.Gateway) + } + l, err := netlink.LinkByName(config.InterfaceName) + if err != nil { + return err + } + route := &netlink.Route{ + Scope: netlink.SCOPE_UNIVERSE, + Dst: dst, + Src: src, + Gw: gw, + LinkIndex: l.Attrs().Index, + } + if err := netlink.RouteAdd(route); err != nil { + return err + } + } + return nil +} + +func setupRlimits(limits []configs.Rlimit, pid int) error { + for _, rlimit := range limits { + if err := system.Prlimit(pid, rlimit.Type, unix.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}); err != nil { + return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err) + } + } + return nil +} + +const _P_PID = 1 + +type siginfo struct { + si_signo int32 + si_errno int32 + si_code int32 + // below here is a union; si_pid is the only field we use + si_pid int32 + // Pad to 128 bytes as detailed in blockUntilWaitable + pad [96]byte +} + +// isWaitable returns true if the process has exited false otherwise. +// Its based off blockUntilWaitable in src/os/wait_waitid.go +func isWaitable(pid int) (bool, error) { + si := &siginfo{} + _, _, e := unix.Syscall6(unix.SYS_WAITID, _P_PID, uintptr(pid), uintptr(unsafe.Pointer(si)), unix.WEXITED|unix.WNOWAIT|unix.WNOHANG, 0, 0) + if e != 0 { + return false, os.NewSyscallError("waitid", e) + } + + return si.si_pid != 0, nil +} + +// isNoChildren returns true if err represents a unix.ECHILD (formerly syscall.ECHILD) false otherwise +func isNoChildren(err error) bool { + switch err := err.(type) { + case syscall.Errno: + if err == unix.ECHILD { + return true + } + case *os.SyscallError: + if err.Err == unix.ECHILD { + return true + } + } + return false +} + +// signalAllProcesses freezes then iterates over all the processes inside the +// manager's cgroups sending the signal s to them. +// If s is SIGKILL then it will wait for each process to exit. +// For all other signals it will check if the process is ready to report its +// exit status and only if it is will a wait be performed. +func signalAllProcesses(m cgroups.Manager, s os.Signal) error { + var procs []*os.Process + if err := m.Freeze(configs.Frozen); err != nil { + logrus.Warn(err) + } + pids, err := m.GetAllPids() + if err != nil { + m.Freeze(configs.Thawed) + return err + } + for _, pid := range pids { + p, err := os.FindProcess(pid) + if err != nil { + logrus.Warn(err) + continue + } + procs = append(procs, p) + if err := p.Signal(s); err != nil { + logrus.Warn(err) + } + } + if err := m.Freeze(configs.Thawed); err != nil { + logrus.Warn(err) + } + + for _, p := range procs { + if s != unix.SIGKILL { + if ok, err := isWaitable(p.Pid); err != nil { + if !isNoChildren(err) { + logrus.Warn("signalAllProcesses: ", p.Pid, err) + } + continue + } else if !ok { + // Not ready to report so don't wait + continue + } + } + + if _, err := p.Wait(); err != nil { + if !isNoChildren(err) { + logrus.Warn("wait: ", err) + } + } + } + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/keys/keyctl.go b/vendor/github.com/opencontainers/runc/libcontainer/keys/keyctl.go new file mode 100644 index 000000000..82ffa7a88 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/keys/keyctl.go @@ -0,0 +1,50 @@ +// +build linux + +package keys + +import ( + "fmt" + "strconv" + "strings" + + "golang.org/x/sys/unix" +) + +type KeySerial uint32 + +func JoinSessionKeyring(name string) (KeySerial, error) { + sessKeyId, err := unix.KeyctlJoinSessionKeyring(name) + if err != nil { + return 0, fmt.Errorf("could not create session key: %v", err) + } + return KeySerial(sessKeyId), nil +} + +// ModKeyringPerm modifies permissions on a keyring by reading the current permissions, +// anding the bits with the given mask (clearing permissions) and setting +// additional permission bits +func ModKeyringPerm(ringId KeySerial, mask, setbits uint32) error { + dest, err := unix.KeyctlString(unix.KEYCTL_DESCRIBE, int(ringId)) + if err != nil { + return err + } + + res := strings.Split(string(dest), ";") + if len(res) < 5 { + return fmt.Errorf("Destination buffer for key description is too small") + } + + // parse permissions + perm64, err := strconv.ParseUint(res[3], 16, 32) + if err != nil { + return err + } + + perm := (uint32(perm64) & mask) | setbits + + if err := unix.KeyctlSetperm(int(ringId), perm); err != nil { + return err + } + + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/message_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/message_linux.go new file mode 100644 index 000000000..8829b71ad --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/message_linux.go @@ -0,0 +1,87 @@ +// +build linux + +package libcontainer + +import ( + "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" +) + +// list of known message types we want to send to bootstrap program +// The number is randomly chosen to not conflict with known netlink types +const ( + InitMsg uint16 = 62000 + CloneFlagsAttr uint16 = 27281 + NsPathsAttr uint16 = 27282 + UidmapAttr uint16 = 27283 + GidmapAttr uint16 = 27284 + SetgroupAttr uint16 = 27285 + OomScoreAdjAttr uint16 = 27286 + RootlessAttr uint16 = 27287 +) + +type Int32msg struct { + Type uint16 + Value uint32 +} + +// Serialize serializes the message. +// Int32msg has the following representation +// | nlattr len | nlattr type | +// | uint32 value | +func (msg *Int32msg) Serialize() []byte { + buf := make([]byte, msg.Len()) + native := nl.NativeEndian() + native.PutUint16(buf[0:2], uint16(msg.Len())) + native.PutUint16(buf[2:4], msg.Type) + native.PutUint32(buf[4:8], msg.Value) + return buf +} + +func (msg *Int32msg) Len() int { + return unix.NLA_HDRLEN + 4 +} + +// Bytemsg has the following representation +// | nlattr len | nlattr type | +// | value | pad | +type Bytemsg struct { + Type uint16 + Value []byte +} + +func (msg *Bytemsg) Serialize() []byte { + l := msg.Len() + buf := make([]byte, (l+unix.NLA_ALIGNTO-1) & ^(unix.NLA_ALIGNTO-1)) + native := nl.NativeEndian() + native.PutUint16(buf[0:2], uint16(l)) + native.PutUint16(buf[2:4], msg.Type) + copy(buf[4:], msg.Value) + return buf +} + +func (msg *Bytemsg) Len() int { + return unix.NLA_HDRLEN + len(msg.Value) + 1 // null-terminated +} + +type Boolmsg struct { + Type uint16 + Value bool +} + +func (msg *Boolmsg) Serialize() []byte { + buf := make([]byte, msg.Len()) + native := nl.NativeEndian() + native.PutUint16(buf[0:2], uint16(msg.Len())) + native.PutUint16(buf[2:4], msg.Type) + if msg.Value { + buf[4] = 1 + } else { + buf[4] = 0 + } + return buf +} + +func (msg *Boolmsg) Len() int { + return unix.NLA_HDRLEN + 1 +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/network_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/network_linux.go new file mode 100644 index 000000000..5075bee4d --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/network_linux.go @@ -0,0 +1,259 @@ +// +build linux + +package libcontainer + +import ( + "fmt" + "io/ioutil" + "net" + "path/filepath" + "strconv" + "strings" + + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/utils" + "github.com/vishvananda/netlink" +) + +var strategies = map[string]networkStrategy{ + "veth": &veth{}, + "loopback": &loopback{}, +} + +// networkStrategy represents a specific network configuration for +// a container's networking stack +type networkStrategy interface { + create(*network, int) error + initialize(*network) error + detach(*configs.Network) error + attach(*configs.Network) error +} + +// getStrategy returns the specific network strategy for the +// provided type. +func getStrategy(tpe string) (networkStrategy, error) { + s, exists := strategies[tpe] + if !exists { + return nil, fmt.Errorf("unknown strategy type %q", tpe) + } + return s, nil +} + +// Returns the network statistics for the network interfaces represented by the NetworkRuntimeInfo. +func getNetworkInterfaceStats(interfaceName string) (*NetworkInterface, error) { + out := &NetworkInterface{Name: interfaceName} + // This can happen if the network runtime information is missing - possible if the + // container was created by an old version of libcontainer. + if interfaceName == "" { + return out, nil + } + type netStatsPair struct { + // Where to write the output. + Out *uint64 + // The network stats file to read. + File string + } + // Ingress for host veth is from the container. Hence tx_bytes stat on the host veth is actually number of bytes received by the container. + netStats := []netStatsPair{ + {Out: &out.RxBytes, File: "tx_bytes"}, + {Out: &out.RxPackets, File: "tx_packets"}, + {Out: &out.RxErrors, File: "tx_errors"}, + {Out: &out.RxDropped, File: "tx_dropped"}, + + {Out: &out.TxBytes, File: "rx_bytes"}, + {Out: &out.TxPackets, File: "rx_packets"}, + {Out: &out.TxErrors, File: "rx_errors"}, + {Out: &out.TxDropped, File: "rx_dropped"}, + } + for _, netStat := range netStats { + data, err := readSysfsNetworkStats(interfaceName, netStat.File) + if err != nil { + return nil, err + } + *(netStat.Out) = data + } + return out, nil +} + +// Reads the specified statistics available under /sys/class/net/<EthInterface>/statistics +func readSysfsNetworkStats(ethInterface, statsFile string) (uint64, error) { + data, err := ioutil.ReadFile(filepath.Join("/sys/class/net", ethInterface, "statistics", statsFile)) + if err != nil { + return 0, err + } + return strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64) +} + +// loopback is a network strategy that provides a basic loopback device +type loopback struct { +} + +func (l *loopback) create(n *network, nspid int) error { + return nil +} + +func (l *loopback) initialize(config *network) error { + return netlink.LinkSetUp(&netlink.Device{LinkAttrs: netlink.LinkAttrs{Name: "lo"}}) +} + +func (l *loopback) attach(n *configs.Network) (err error) { + return nil +} + +func (l *loopback) detach(n *configs.Network) (err error) { + return nil +} + +// veth is a network strategy that uses a bridge and creates +// a veth pair, one that is attached to the bridge on the host and the other +// is placed inside the container's namespace +type veth struct { +} + +func (v *veth) detach(n *configs.Network) (err error) { + return netlink.LinkSetMaster(&netlink.Device{LinkAttrs: netlink.LinkAttrs{Name: n.HostInterfaceName}}, nil) +} + +// attach a container network interface to an external network +func (v *veth) attach(n *configs.Network) (err error) { + brl, err := netlink.LinkByName(n.Bridge) + if err != nil { + return err + } + br, ok := brl.(*netlink.Bridge) + if !ok { + return fmt.Errorf("Wrong device type %T", brl) + } + host, err := netlink.LinkByName(n.HostInterfaceName) + if err != nil { + return err + } + + if err := netlink.LinkSetMaster(host, br); err != nil { + return err + } + if err := netlink.LinkSetMTU(host, n.Mtu); err != nil { + return err + } + if n.HairpinMode { + if err := netlink.LinkSetHairpin(host, true); err != nil { + return err + } + } + if err := netlink.LinkSetUp(host); err != nil { + return err + } + + return nil +} + +func (v *veth) create(n *network, nspid int) (err error) { + tmpName, err := v.generateTempPeerName() + if err != nil { + return err + } + n.TempVethPeerName = tmpName + if n.Bridge == "" { + return fmt.Errorf("bridge is not specified") + } + veth := &netlink.Veth{ + LinkAttrs: netlink.LinkAttrs{ + Name: n.HostInterfaceName, + TxQLen: n.TxQueueLen, + }, + PeerName: n.TempVethPeerName, + } + if err := netlink.LinkAdd(veth); err != nil { + return err + } + defer func() { + if err != nil { + netlink.LinkDel(veth) + } + }() + if err := v.attach(&n.Network); err != nil { + return err + } + child, err := netlink.LinkByName(n.TempVethPeerName) + if err != nil { + return err + } + return netlink.LinkSetNsPid(child, nspid) +} + +func (v *veth) generateTempPeerName() (string, error) { + return utils.GenerateRandomName("veth", 7) +} + +func (v *veth) initialize(config *network) error { + peer := config.TempVethPeerName + if peer == "" { + return fmt.Errorf("peer is not specified") + } + child, err := netlink.LinkByName(peer) + if err != nil { + return err + } + if err := netlink.LinkSetDown(child); err != nil { + return err + } + if err := netlink.LinkSetName(child, config.Name); err != nil { + return err + } + // get the interface again after we changed the name as the index also changes. + if child, err = netlink.LinkByName(config.Name); err != nil { + return err + } + if config.MacAddress != "" { + mac, err := net.ParseMAC(config.MacAddress) + if err != nil { + return err + } + if err := netlink.LinkSetHardwareAddr(child, mac); err != nil { + return err + } + } + ip, err := netlink.ParseAddr(config.Address) + if err != nil { + return err + } + if err := netlink.AddrAdd(child, ip); err != nil { + return err + } + if config.IPv6Address != "" { + ip6, err := netlink.ParseAddr(config.IPv6Address) + if err != nil { + return err + } + if err := netlink.AddrAdd(child, ip6); err != nil { + return err + } + } + if err := netlink.LinkSetMTU(child, config.Mtu); err != nil { + return err + } + if err := netlink.LinkSetUp(child); err != nil { + return err + } + if config.Gateway != "" { + gw := net.ParseIP(config.Gateway) + if err := netlink.RouteAdd(&netlink.Route{ + Scope: netlink.SCOPE_UNIVERSE, + LinkIndex: child.Attrs().Index, + Gw: gw, + }); err != nil { + return err + } + } + if config.IPv6Gateway != "" { + gw := net.ParseIP(config.IPv6Gateway) + if err := netlink.RouteAdd(&netlink.Route{ + Scope: netlink.SCOPE_UNIVERSE, + LinkIndex: child.Attrs().Index, + Gw: gw, + }); err != nil { + return err + } + } + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/notify_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/notify_linux.go new file mode 100644 index 000000000..81587b9b1 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/notify_linux.go @@ -0,0 +1,90 @@ +// +build linux + +package libcontainer + +import ( + "fmt" + "io/ioutil" + "os" + "path/filepath" + + "golang.org/x/sys/unix" +) + +const oomCgroupName = "memory" + +type PressureLevel uint + +const ( + LowPressure PressureLevel = iota + MediumPressure + CriticalPressure +) + +func registerMemoryEvent(cgDir string, evName string, arg string) (<-chan struct{}, error) { + evFile, err := os.Open(filepath.Join(cgDir, evName)) + if err != nil { + return nil, err + } + fd, err := unix.Eventfd(0, unix.EFD_CLOEXEC) + if err != nil { + evFile.Close() + return nil, err + } + + eventfd := os.NewFile(uintptr(fd), "eventfd") + + eventControlPath := filepath.Join(cgDir, "cgroup.event_control") + data := fmt.Sprintf("%d %d %s", eventfd.Fd(), evFile.Fd(), arg) + if err := ioutil.WriteFile(eventControlPath, []byte(data), 0700); err != nil { + eventfd.Close() + evFile.Close() + return nil, err + } + ch := make(chan struct{}) + go func() { + defer func() { + close(ch) + eventfd.Close() + evFile.Close() + }() + buf := make([]byte, 8) + for { + if _, err := eventfd.Read(buf); err != nil { + return + } + // When a cgroup is destroyed, an event is sent to eventfd. + // So if the control path is gone, return instead of notifying. + if _, err := os.Lstat(eventControlPath); os.IsNotExist(err) { + return + } + ch <- struct{}{} + } + }() + return ch, nil +} + +// notifyOnOOM returns channel on which you can expect event about OOM, +// if process died without OOM this channel will be closed. +func notifyOnOOM(paths map[string]string) (<-chan struct{}, error) { + dir := paths[oomCgroupName] + if dir == "" { + return nil, fmt.Errorf("path %q missing", oomCgroupName) + } + + return registerMemoryEvent(dir, "memory.oom_control", "") +} + +func notifyMemoryPressure(paths map[string]string, level PressureLevel) (<-chan struct{}, error) { + dir := paths[oomCgroupName] + if dir == "" { + return nil, fmt.Errorf("path %q missing", oomCgroupName) + } + + if level > CriticalPressure { + return nil, fmt.Errorf("invalid pressure level %d", level) + } + + levelStr := []string{"low", "medium", "critical"}[level] + return registerMemoryEvent(dir, "memory.pressure_level", levelStr) +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/README.md b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/README.md new file mode 100644 index 000000000..575701371 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/README.md @@ -0,0 +1,44 @@ +## nsenter + +The `nsenter` package registers a special init constructor that is called before +the Go runtime has a chance to boot. This provides us the ability to `setns` on +existing namespaces and avoid the issues that the Go runtime has with multiple +threads. This constructor will be called if this package is registered, +imported, in your go application. + +The `nsenter` package will `import "C"` and it uses [cgo](https://golang.org/cmd/cgo/) +package. In cgo, if the import of "C" is immediately preceded by a comment, that comment, +called the preamble, is used as a header when compiling the C parts of the package. +So every time we import package `nsenter`, the C code function `nsexec()` would be +called. And package `nsenter` is now only imported in `main_unix.go`, so every time +before we call `cmd.Start` on linux, that C code would run. + +Because `nsexec()` must be run before the Go runtime in order to use the +Linux kernel namespace, you must `import` this library into a package if +you plan to use `libcontainer` directly. Otherwise Go will not execute +the `nsexec()` constructor, which means that the re-exec will not cause +the namespaces to be joined. You can import it like this: + +```go +import _ "github.com/opencontainers/runc/libcontainer/nsenter" +``` + +`nsexec()` will first get the file descriptor number for the init pipe +from the environment variable `_LIBCONTAINER_INITPIPE` (which was opened +by the parent and kept open across the fork-exec of the `nsexec()` init +process). The init pipe is used to read bootstrap data (namespace paths, +clone flags, uid and gid mappings, and the console path) from the parent +process. `nsexec()` will then call `setns(2)` to join the namespaces +provided in the bootstrap data (if available), `clone(2)` a child process +with the provided clone flags, update the user and group ID mappings, do +some further miscellaneous setup steps, and then send the PID of the +child process to the parent of the `nsexec()` "caller". Finally, +the parent `nsexec()` will exit and the child `nsexec()` process will +return to allow the Go runtime take over. + +NOTE: We do both `setns(2)` and `clone(2)` even if we don't have any +CLONE_NEW* clone flags because we must fork a new process in order to +enter the PID namespace. + + + diff --git a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/namespace.h b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/namespace.h new file mode 100644 index 000000000..9e9bdca05 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/namespace.h @@ -0,0 +1,32 @@ +#ifndef NSENTER_NAMESPACE_H +#define NSENTER_NAMESPACE_H + +#ifndef _GNU_SOURCE +# define _GNU_SOURCE +#endif +#include <sched.h> + +/* All of these are taken from include/uapi/linux/sched.h */ +#ifndef CLONE_NEWNS +# define CLONE_NEWNS 0x00020000 /* New mount namespace group */ +#endif +#ifndef CLONE_NEWCGROUP +# define CLONE_NEWCGROUP 0x02000000 /* New cgroup namespace */ +#endif +#ifndef CLONE_NEWUTS +# define CLONE_NEWUTS 0x04000000 /* New utsname namespace */ +#endif +#ifndef CLONE_NEWIPC +# define CLONE_NEWIPC 0x08000000 /* New ipc namespace */ +#endif +#ifndef CLONE_NEWUSER +# define CLONE_NEWUSER 0x10000000 /* New user namespace */ +#endif +#ifndef CLONE_NEWPID +# define CLONE_NEWPID 0x20000000 /* New pid namespace */ +#endif +#ifndef CLONE_NEWNET +# define CLONE_NEWNET 0x40000000 /* New network namespace */ +#endif + +#endif /* NSENTER_NAMESPACE_H */ diff --git a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter.go b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter.go new file mode 100644 index 000000000..07f4d63e4 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter.go @@ -0,0 +1,12 @@ +// +build linux,!gccgo + +package nsenter + +/* +#cgo CFLAGS: -Wall +extern void nsexec(); +void __attribute__((constructor)) init(void) { + nsexec(); +} +*/ +import "C" diff --git a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_gccgo.go b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_gccgo.go new file mode 100644 index 000000000..63c7a3ec2 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_gccgo.go @@ -0,0 +1,25 @@ +// +build linux,gccgo + +package nsenter + +/* +#cgo CFLAGS: -Wall +extern void nsexec(); +void __attribute__((constructor)) init(void) { + nsexec(); +} +*/ +import "C" + +// AlwaysFalse is here to stay false +// (and be exported so the compiler doesn't optimize out its reference) +var AlwaysFalse bool + +func init() { + if AlwaysFalse { + // by referencing this C init() in a noop test, it will ensure the compiler + // links in the C function. + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65134 + C.init() + } +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_unsupported.go new file mode 100644 index 000000000..ac701ca39 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_unsupported.go @@ -0,0 +1,5 @@ +// +build !linux !cgo + +package nsenter + +import "C" diff --git a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c new file mode 100644 index 000000000..197e6d08e --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c @@ -0,0 +1,862 @@ +#define _GNU_SOURCE +#include <endian.h> +#include <errno.h> +#include <fcntl.h> +#include <grp.h> +#include <sched.h> +#include <setjmp.h> +#include <signal.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> + +#include <sys/ioctl.h> +#include <sys/prctl.h> +#include <sys/socket.h> +#include <sys/types.h> + +#include <linux/limits.h> +#include <linux/netlink.h> +#include <linux/types.h> + +/* Get all of the CLONE_NEW* flags. */ +#include "namespace.h" + +/* Synchronisation values. */ +enum sync_t { + SYNC_USERMAP_PLS = 0x40, /* Request parent to map our users. */ + SYNC_USERMAP_ACK = 0x41, /* Mapping finished by the parent. */ + SYNC_RECVPID_PLS = 0x42, /* Tell parent we're sending the PID. */ + SYNC_RECVPID_ACK = 0x43, /* PID was correctly received by parent. */ + SYNC_GRANDCHILD = 0x44, /* The grandchild is ready to run. */ + SYNC_CHILD_READY = 0x45, /* The child or grandchild is ready to return. */ + + /* XXX: This doesn't help with segfaults and other such issues. */ + SYNC_ERR = 0xFF, /* Fatal error, no turning back. The error code follows. */ +}; + +/* longjmp() arguments. */ +#define JUMP_PARENT 0x00 +#define JUMP_CHILD 0xA0 +#define JUMP_INIT 0xA1 + +/* JSON buffer. */ +#define JSON_MAX 4096 + +/* Assume the stack grows down, so arguments should be above it. */ +struct clone_t { + /* + * Reserve some space for clone() to locate arguments + * and retcode in this place + */ + char stack[4096] __attribute__ ((aligned(16))); + char stack_ptr[0]; + + /* There's two children. This is used to execute the different code. */ + jmp_buf *env; + int jmpval; +}; + +struct nlconfig_t { + char *data; + uint32_t cloneflags; + char *uidmap; + size_t uidmap_len; + char *gidmap; + size_t gidmap_len; + char *namespaces; + size_t namespaces_len; + uint8_t is_setgroup; + uint8_t is_rootless; + char *oom_score_adj; + size_t oom_score_adj_len; +}; + +/* + * List of netlink message types sent to us as part of bootstrapping the init. + * These constants are defined in libcontainer/message_linux.go. + */ +#define INIT_MSG 62000 +#define CLONE_FLAGS_ATTR 27281 +#define NS_PATHS_ATTR 27282 +#define UIDMAP_ATTR 27283 +#define GIDMAP_ATTR 27284 +#define SETGROUP_ATTR 27285 +#define OOM_SCORE_ADJ_ATTR 27286 +#define ROOTLESS_ATTR 27287 + +/* + * Use the raw syscall for versions of glibc which don't include a function for + * it, namely (glibc 2.12). + */ +#if __GLIBC__ == 2 && __GLIBC_MINOR__ < 14 +# define _GNU_SOURCE +# include "syscall.h" +# if !defined(SYS_setns) && defined(__NR_setns) +# define SYS_setns __NR_setns +# endif + +#ifndef SYS_setns +# error "setns(2) syscall not supported by glibc version" +#endif + +int setns(int fd, int nstype) +{ + return syscall(SYS_setns, fd, nstype); +} +#endif + +/* XXX: This is ugly. */ +static int syncfd = -1; + +/* TODO(cyphar): Fix this so it correctly deals with syncT. */ +#define bail(fmt, ...) \ + do { \ + int ret = __COUNTER__ + 1; \ + fprintf(stderr, "nsenter: " fmt ": %m\n", ##__VA_ARGS__); \ + if (syncfd >= 0) { \ + enum sync_t s = SYNC_ERR; \ + if (write(syncfd, &s, sizeof(s)) != sizeof(s)) \ + fprintf(stderr, "nsenter: failed: write(s)"); \ + if (write(syncfd, &ret, sizeof(ret)) != sizeof(ret)) \ + fprintf(stderr, "nsenter: failed: write(ret)"); \ + } \ + exit(ret); \ + } while(0) + +static int write_file(char *data, size_t data_len, char *pathfmt, ...) +{ + int fd, len, ret = 0; + char path[PATH_MAX]; + + va_list ap; + va_start(ap, pathfmt); + len = vsnprintf(path, PATH_MAX, pathfmt, ap); + va_end(ap); + if (len < 0) + return -1; + + fd = open(path, O_RDWR); + if (fd < 0) { + return -1; + } + + len = write(fd, data, data_len); + if (len != data_len) { + ret = -1; + goto out; + } + +out: + close(fd); + return ret; +} + +enum policy_t { + SETGROUPS_DEFAULT = 0, + SETGROUPS_ALLOW, + SETGROUPS_DENY, +}; + +/* This *must* be called before we touch gid_map. */ +static void update_setgroups(int pid, enum policy_t setgroup) +{ + char *policy; + + switch (setgroup) { + case SETGROUPS_ALLOW: + policy = "allow"; + break; + case SETGROUPS_DENY: + policy = "deny"; + break; + case SETGROUPS_DEFAULT: + default: + /* Nothing to do. */ + return; + } + + if (write_file(policy, strlen(policy), "/proc/%d/setgroups", pid) < 0) { + /* + * If the kernel is too old to support /proc/pid/setgroups, + * open(2) or write(2) will return ENOENT. This is fine. + */ + if (errno != ENOENT) + bail("failed to write '%s' to /proc/%d/setgroups", policy, pid); + } +} + +static void update_uidmap(int pid, char *map, size_t map_len) +{ + if (map == NULL || map_len <= 0) + return; + + if (write_file(map, map_len, "/proc/%d/uid_map", pid) < 0) + bail("failed to update /proc/%d/uid_map", pid); +} + +static void update_gidmap(int pid, char *map, size_t map_len) +{ + if (map == NULL || map_len <= 0) + return; + + if (write_file(map, map_len, "/proc/%d/gid_map", pid) < 0) + bail("failed to update /proc/%d/gid_map", pid); +} + +static void update_oom_score_adj(char *data, size_t len) +{ + if (data == NULL || len <= 0) + return; + + if (write_file(data, len, "/proc/self/oom_score_adj") < 0) + bail("failed to update /proc/self/oom_score_adj"); +} + +/* A dummy function that just jumps to the given jumpval. */ +static int child_func(void *arg) __attribute__ ((noinline)); +static int child_func(void *arg) +{ + struct clone_t *ca = (struct clone_t *)arg; + longjmp(*ca->env, ca->jmpval); +} + +static int clone_parent(jmp_buf *env, int jmpval) __attribute__ ((noinline)); +static int clone_parent(jmp_buf *env, int jmpval) +{ + struct clone_t ca = { + .env = env, + .jmpval = jmpval, + }; + + return clone(child_func, ca.stack_ptr, CLONE_PARENT | SIGCHLD, &ca); +} + +/* + * Gets the init pipe fd from the environment, which is used to read the + * bootstrap data and tell the parent what the new pid is after we finish + * setting up the environment. + */ +static int initpipe(void) +{ + int pipenum; + char *initpipe, *endptr; + + initpipe = getenv("_LIBCONTAINER_INITPIPE"); + if (initpipe == NULL || *initpipe == '\0') + return -1; + + pipenum = strtol(initpipe, &endptr, 10); + if (*endptr != '\0') + bail("unable to parse _LIBCONTAINER_INITPIPE"); + + return pipenum; +} + +/* Returns the clone(2) flag for a namespace, given the name of a namespace. */ +static int nsflag(char *name) +{ + if (!strcmp(name, "cgroup")) + return CLONE_NEWCGROUP; + else if (!strcmp(name, "ipc")) + return CLONE_NEWIPC; + else if (!strcmp(name, "mnt")) + return CLONE_NEWNS; + else if (!strcmp(name, "net")) + return CLONE_NEWNET; + else if (!strcmp(name, "pid")) + return CLONE_NEWPID; + else if (!strcmp(name, "user")) + return CLONE_NEWUSER; + else if (!strcmp(name, "uts")) + return CLONE_NEWUTS; + + /* If we don't recognise a name, fallback to 0. */ + return 0; +} + +static uint32_t readint32(char *buf) +{ + return *(uint32_t *) buf; +} + +static uint8_t readint8(char *buf) +{ + return *(uint8_t *) buf; +} + +static void nl_parse(int fd, struct nlconfig_t *config) +{ + size_t len, size; + struct nlmsghdr hdr; + char *data, *current; + + /* Retrieve the netlink header. */ + len = read(fd, &hdr, NLMSG_HDRLEN); + if (len != NLMSG_HDRLEN) + bail("invalid netlink header length %zu", len); + + if (hdr.nlmsg_type == NLMSG_ERROR) + bail("failed to read netlink message"); + + if (hdr.nlmsg_type != INIT_MSG) + bail("unexpected msg type %d", hdr.nlmsg_type); + + /* Retrieve data. */ + size = NLMSG_PAYLOAD(&hdr, 0); + current = data = malloc(size); + if (!data) + bail("failed to allocate %zu bytes of memory for nl_payload", size); + + len = read(fd, data, size); + if (len != size) + bail("failed to read netlink payload, %zu != %zu", len, size); + + /* Parse the netlink payload. */ + config->data = data; + while (current < data + size) { + struct nlattr *nlattr = (struct nlattr *)current; + size_t payload_len = nlattr->nla_len - NLA_HDRLEN; + + /* Advance to payload. */ + current += NLA_HDRLEN; + + /* Handle payload. */ + switch (nlattr->nla_type) { + case CLONE_FLAGS_ATTR: + config->cloneflags = readint32(current); + break; + case ROOTLESS_ATTR: + config->is_rootless = readint8(current); + break; + case OOM_SCORE_ADJ_ATTR: + config->oom_score_adj = current; + config->oom_score_adj_len = payload_len; + break; + case NS_PATHS_ATTR: + config->namespaces = current; + config->namespaces_len = payload_len; + break; + case UIDMAP_ATTR: + config->uidmap = current; + config->uidmap_len = payload_len; + break; + case GIDMAP_ATTR: + config->gidmap = current; + config->gidmap_len = payload_len; + break; + case SETGROUP_ATTR: + config->is_setgroup = readint8(current); + break; + default: + bail("unknown netlink message type %d", nlattr->nla_type); + } + + current += NLA_ALIGN(payload_len); + } +} + +void nl_free(struct nlconfig_t *config) +{ + free(config->data); +} + +void join_namespaces(char *nslist) +{ + int num = 0, i; + char *saveptr = NULL; + char *namespace = strtok_r(nslist, ",", &saveptr); + struct namespace_t { + int fd; + int ns; + char type[PATH_MAX]; + char path[PATH_MAX]; + } *namespaces = NULL; + + if (!namespace || !strlen(namespace) || !strlen(nslist)) + bail("ns paths are empty"); + + /* + * We have to open the file descriptors first, since after + * we join the mnt namespace we might no longer be able to + * access the paths. + */ + do { + int fd; + char *path; + struct namespace_t *ns; + + /* Resize the namespace array. */ + namespaces = realloc(namespaces, ++num * sizeof(struct namespace_t)); + if (!namespaces) + bail("failed to reallocate namespace array"); + ns = &namespaces[num - 1]; + + /* Split 'ns:path'. */ + path = strstr(namespace, ":"); + if (!path) + bail("failed to parse %s", namespace); + *path++ = '\0'; + + fd = open(path, O_RDONLY); + if (fd < 0) + bail("failed to open %s", path); + + ns->fd = fd; + ns->ns = nsflag(namespace); + strncpy(ns->path, path, PATH_MAX); + } while ((namespace = strtok_r(NULL, ",", &saveptr)) != NULL); + + /* + * The ordering in which we join namespaces is important. We should + * always join the user namespace *first*. This is all guaranteed + * from the container_linux.go side of this, so we're just going to + * follow the order given to us. + */ + + for (i = 0; i < num; i++) { + struct namespace_t ns = namespaces[i]; + + if (setns(ns.fd, ns.ns) < 0) + bail("failed to setns to %s", ns.path); + + close(ns.fd); + } + + free(namespaces); +} + +void nsexec(void) +{ + int pipenum; + jmp_buf env; + int sync_child_pipe[2], sync_grandchild_pipe[2]; + struct nlconfig_t config = {0}; + + /* + * If we don't have an init pipe, just return to the go routine. + * We'll only get an init pipe for start or exec. + */ + pipenum = initpipe(); + if (pipenum == -1) + return; + + /* Parse all of the netlink configuration. */ + nl_parse(pipenum, &config); + + /* Set oom_score_adj. This has to be done before !dumpable because + * /proc/self/oom_score_adj is not writeable unless you're an privileged + * user (if !dumpable is set). All children inherit their parent's + * oom_score_adj value on fork(2) so this will always be propagated + * properly. + */ + update_oom_score_adj(config.oom_score_adj, config.oom_score_adj_len); + + /* + * Make the process non-dumpable, to avoid various race conditions that + * could cause processes in namespaces we're joining to access host + * resources (or potentially execute code). + * + * However, if the number of namespaces we are joining is 0, we are not + * going to be switching to a different security context. Thus setting + * ourselves to be non-dumpable only breaks things (like rootless + * containers), which is the recommendation from the kernel folks. + */ + if (config.namespaces) { + if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0) + bail("failed to set process as non-dumpable"); + } + + /* Pipe so we can tell the child when we've finished setting up. */ + if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sync_child_pipe) < 0) + bail("failed to setup sync pipe between parent and child"); + + /* + * We need a new socketpair to sync with grandchild so we don't have + * race condition with child. + */ + if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sync_grandchild_pipe) < 0) + bail("failed to setup sync pipe between parent and grandchild"); + + /* TODO: Currently we aren't dealing with child deaths properly. */ + + /* + * Okay, so this is quite annoying. + * + * In order for this unsharing code to be more extensible we need to split + * up unshare(CLONE_NEWUSER) and clone() in various ways. The ideal case + * would be if we did clone(CLONE_NEWUSER) and the other namespaces + * separately, but because of SELinux issues we cannot really do that. But + * we cannot just dump the namespace flags into clone(...) because several + * usecases (such as rootless containers) require more granularity around + * the namespace setup. In addition, some older kernels had issues where + * CLONE_NEWUSER wasn't handled before other namespaces (but we cannot + * handle this while also dealing with SELinux so we choose SELinux support + * over broken kernel support). + * + * However, if we unshare(2) the user namespace *before* we clone(2), then + * all hell breaks loose. + * + * The parent no longer has permissions to do many things (unshare(2) drops + * all capabilities in your old namespace), and the container cannot be set + * up to have more than one {uid,gid} mapping. This is obviously less than + * ideal. In order to fix this, we have to first clone(2) and then unshare. + * + * Unfortunately, it's not as simple as that. We have to fork to enter the + * PID namespace (the PID namespace only applies to children). Since we'll + * have to double-fork, this clone_parent() call won't be able to get the + * PID of the _actual_ init process (without doing more synchronisation than + * I can deal with at the moment). So we'll just get the parent to send it + * for us, the only job of this process is to update + * /proc/pid/{setgroups,uid_map,gid_map}. + * + * And as a result of the above, we also need to setns(2) in the first child + * because if we join a PID namespace in the topmost parent then our child + * will be in that namespace (and it will not be able to give us a PID value + * that makes sense without resorting to sending things with cmsg). + * + * This also deals with an older issue caused by dumping cloneflags into + * clone(2): On old kernels, CLONE_PARENT didn't work with CLONE_NEWPID, so + * we have to unshare(2) before clone(2) in order to do this. This was fixed + * in upstream commit 1f7f4dde5c945f41a7abc2285be43d918029ecc5, and was + * introduced by 40a0d32d1eaffe6aac7324ca92604b6b3977eb0e. As far as we're + * aware, the last mainline kernel which had this bug was Linux 3.12. + * However, we cannot comment on which kernels the broken patch was + * backported to. + * + * -- Aleksa "what has my life come to?" Sarai + */ + + switch (setjmp(env)) { + /* + * Stage 0: We're in the parent. Our job is just to create a new child + * (stage 1: JUMP_CHILD) process and write its uid_map and + * gid_map. That process will go on to create a new process, then + * it will send us its PID which we will send to the bootstrap + * process. + */ + case JUMP_PARENT: { + int len; + pid_t child; + char buf[JSON_MAX]; + bool ready = false; + + /* For debugging. */ + prctl(PR_SET_NAME, (unsigned long) "runc:[0:PARENT]", 0, 0, 0); + + /* Start the process of getting a container. */ + child = clone_parent(&env, JUMP_CHILD); + if (child < 0) + bail("unable to fork: child_func"); + + /* + * State machine for synchronisation with the children. + * + * Father only return when both child and grandchild are + * ready, so we can receive all possible error codes + * generated by children. + */ + while (!ready) { + enum sync_t s; + int ret; + + syncfd = sync_child_pipe[1]; + close(sync_child_pipe[0]); + + if (read(syncfd, &s, sizeof(s)) != sizeof(s)) + bail("failed to sync with child: next state"); + + switch (s) { + case SYNC_ERR: + /* We have to mirror the error code of the child. */ + if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret)) + bail("failed to sync with child: read(error code)"); + + exit(ret); + case SYNC_USERMAP_PLS: + /* + * Enable setgroups(2) if we've been asked to. But we also + * have to explicitly disable setgroups(2) if we're + * creating a rootless container (this is required since + * Linux 3.19). + */ + if (config.is_rootless && config.is_setgroup) { + kill(child, SIGKILL); + bail("cannot allow setgroup in an unprivileged user namespace setup"); + } + + if (config.is_setgroup) + update_setgroups(child, SETGROUPS_ALLOW); + if (config.is_rootless) + update_setgroups(child, SETGROUPS_DENY); + + /* Set up mappings. */ + update_uidmap(child, config.uidmap, config.uidmap_len); + update_gidmap(child, config.gidmap, config.gidmap_len); + + s = SYNC_USERMAP_ACK; + if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { + kill(child, SIGKILL); + bail("failed to sync with child: write(SYNC_USERMAP_ACK)"); + } + break; + case SYNC_RECVPID_PLS: { + pid_t old = child; + + /* Get the init_func pid. */ + if (read(syncfd, &child, sizeof(child)) != sizeof(child)) { + kill(old, SIGKILL); + bail("failed to sync with child: read(childpid)"); + } + + /* Send ACK. */ + s = SYNC_RECVPID_ACK; + if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { + kill(old, SIGKILL); + kill(child, SIGKILL); + bail("failed to sync with child: write(SYNC_RECVPID_ACK)"); + } + } + break; + case SYNC_CHILD_READY: + ready = true; + break; + default: + bail("unexpected sync value: %u", s); + } + } + + /* Now sync with grandchild. */ + + ready = false; + while (!ready) { + enum sync_t s; + int ret; + + syncfd = sync_grandchild_pipe[1]; + close(sync_grandchild_pipe[0]); + + s = SYNC_GRANDCHILD; + if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { + kill(child, SIGKILL); + bail("failed to sync with child: write(SYNC_GRANDCHILD)"); + } + + if (read(syncfd, &s, sizeof(s)) != sizeof(s)) + bail("failed to sync with child: next state"); + + switch (s) { + case SYNC_ERR: + /* We have to mirror the error code of the child. */ + if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret)) + bail("failed to sync with child: read(error code)"); + + exit(ret); + case SYNC_CHILD_READY: + ready = true; + break; + default: + bail("unexpected sync value: %u", s); + } + } + + /* Send the init_func pid back to our parent. */ + len = snprintf(buf, JSON_MAX, "{\"pid\": %d}\n", child); + if (len < 0) { + kill(child, SIGKILL); + bail("unable to generate JSON for child pid"); + } + if (write(pipenum, buf, len) != len) { + kill(child, SIGKILL); + bail("unable to send child pid to bootstrapper"); + } + + exit(0); + } + + /* + * Stage 1: We're in the first child process. Our job is to join any + * provided namespaces in the netlink payload and unshare all + * of the requested namespaces. If we've been asked to + * CLONE_NEWUSER, we will ask our parent (stage 0) to set up + * our user mappings for us. Then, we create a new child + * (stage 2: JUMP_INIT) for PID namespace. We then send the + * child's PID to our parent (stage 0). + */ + case JUMP_CHILD: { + pid_t child; + enum sync_t s; + + /* We're in a child and thus need to tell the parent if we die. */ + syncfd = sync_child_pipe[0]; + close(sync_child_pipe[1]); + + /* For debugging. */ + prctl(PR_SET_NAME, (unsigned long) "runc:[1:CHILD]", 0, 0, 0); + + /* + * We need to setns first. We cannot do this earlier (in stage 0) + * because of the fact that we forked to get here (the PID of + * [stage 2: JUMP_INIT]) would be meaningless). We could send it + * using cmsg(3) but that's just annoying. + */ + if (config.namespaces) + join_namespaces(config.namespaces); + + /* + * Unshare all of the namespaces. Now, it should be noted that this + * ordering might break in the future (especially with rootless + * containers). But for now, it's not possible to split this into + * CLONE_NEWUSER + [the rest] because of some RHEL SELinux issues. + * + * Note that we don't merge this with clone() because there were + * some old kernel versions where clone(CLONE_PARENT | CLONE_NEWPID) + * was broken, so we'll just do it the long way anyway. + */ + if (unshare(config.cloneflags) < 0) + bail("failed to unshare namespaces"); + + /* + * Deal with user namespaces first. They are quite special, as they + * affect our ability to unshare other namespaces and are used as + * context for privilege checks. + */ + if (config.cloneflags & CLONE_NEWUSER) { + /* + * We don't have the privileges to do any mapping here (see the + * clone_parent rant). So signal our parent to hook us up. + */ + + /* Switching is only necessary if we joined namespaces. */ + if (config.namespaces) { + if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) < 0) + bail("failed to set process as dumpable"); + } + s = SYNC_USERMAP_PLS; + if (write(syncfd, &s, sizeof(s)) != sizeof(s)) + bail("failed to sync with parent: write(SYNC_USERMAP_PLS)"); + + /* ... wait for mapping ... */ + + if (read(syncfd, &s, sizeof(s)) != sizeof(s)) + bail("failed to sync with parent: read(SYNC_USERMAP_ACK)"); + if (s != SYNC_USERMAP_ACK) + bail("failed to sync with parent: SYNC_USERMAP_ACK: got %u", s); + /* Switching is only necessary if we joined namespaces. */ + if (config.namespaces) { + if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0) + bail("failed to set process as dumpable"); + } + } + + /* + * TODO: What about non-namespace clone flags that we're dropping here? + * + * We fork again because of PID namespace, setns(2) or unshare(2) don't + * change the PID namespace of the calling process, because doing so + * would change the caller's idea of its own PID (as reported by getpid()), + * which would break many applications and libraries, so we must fork + * to actually enter the new PID namespace. + */ + child = clone_parent(&env, JUMP_INIT); + if (child < 0) + bail("unable to fork: init_func"); + + /* Send the child to our parent, which knows what it's doing. */ + s = SYNC_RECVPID_PLS; + if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { + kill(child, SIGKILL); + bail("failed to sync with parent: write(SYNC_RECVPID_PLS)"); + } + if (write(syncfd, &child, sizeof(child)) != sizeof(child)) { + kill(child, SIGKILL); + bail("failed to sync with parent: write(childpid)"); + } + + /* ... wait for parent to get the pid ... */ + + if (read(syncfd, &s, sizeof(s)) != sizeof(s)) { + kill(child, SIGKILL); + bail("failed to sync with parent: read(SYNC_RECVPID_ACK)"); + } + if (s != SYNC_RECVPID_ACK) { + kill(child, SIGKILL); + bail("failed to sync with parent: SYNC_RECVPID_ACK: got %u", s); + } + + s = SYNC_CHILD_READY; + if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { + kill(child, SIGKILL); + bail("failed to sync with parent: write(SYNC_CHILD_READY)"); + } + + /* Our work is done. [Stage 2: JUMP_INIT] is doing the rest of the work. */ + exit(0); + } + + /* + * Stage 2: We're the final child process, and the only process that will + * actually return to the Go runtime. Our job is to just do the + * final cleanup steps and then return to the Go runtime to allow + * init_linux.go to run. + */ + case JUMP_INIT: { + /* + * We're inside the child now, having jumped from the + * start_child() code after forking in the parent. + */ + enum sync_t s; + + /* We're in a child and thus need to tell the parent if we die. */ + syncfd = sync_grandchild_pipe[0]; + close(sync_grandchild_pipe[1]); + close(sync_child_pipe[0]); + close(sync_child_pipe[1]); + + /* For debugging. */ + prctl(PR_SET_NAME, (unsigned long) "runc:[2:INIT]", 0, 0, 0); + + if (read(syncfd, &s, sizeof(s)) != sizeof(s)) + bail("failed to sync with parent: read(SYNC_GRANDCHILD)"); + if (s != SYNC_GRANDCHILD) + bail("failed to sync with parent: SYNC_GRANDCHILD: got %u", s); + + if (setsid() < 0) + bail("setsid failed"); + + if (setuid(0) < 0) + bail("setuid failed"); + + if (setgid(0) < 0) + bail("setgid failed"); + + if (!config.is_rootless && config.is_setgroup) { + if (setgroups(0, NULL) < 0) + bail("setgroups failed"); + } + + s = SYNC_CHILD_READY; + if (write(syncfd, &s, sizeof(s)) != sizeof(s)) + bail("failed to sync with patent: write(SYNC_CHILD_READY)"); + + /* Close sync pipes. */ + close(sync_grandchild_pipe[0]); + + /* Free netlink data. */ + nl_free(&config); + + /* Finish executing, let the Go runtime take over. */ + return; + } + default: + bail("unexpected jump value"); + } + + /* Should never be reached. */ + bail("should never be reached"); +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/process.go b/vendor/github.com/opencontainers/runc/libcontainer/process.go new file mode 100644 index 000000000..f1ad08149 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/process.go @@ -0,0 +1,106 @@ +package libcontainer + +import ( + "fmt" + "io" + "math" + "os" + + "github.com/opencontainers/runc/libcontainer/configs" +) + +type processOperations interface { + wait() (*os.ProcessState, error) + signal(sig os.Signal) error + pid() int +} + +// Process specifies the configuration and IO for a process inside +// a container. +type Process struct { + // The command to be run followed by any arguments. + Args []string + + // Env specifies the environment variables for the process. + Env []string + + // User will set the uid and gid of the executing process running inside the container + // local to the container's user and group configuration. + User string + + // AdditionalGroups specifies the gids that should be added to supplementary groups + // in addition to those that the user belongs to. + AdditionalGroups []string + + // Cwd will change the processes current working directory inside the container's rootfs. + Cwd string + + // Stdin is a pointer to a reader which provides the standard input stream. + Stdin io.Reader + + // Stdout is a pointer to a writer which receives the standard output stream. + Stdout io.Writer + + // Stderr is a pointer to a writer which receives the standard error stream. + Stderr io.Writer + + // ExtraFiles specifies additional open files to be inherited by the container + ExtraFiles []*os.File + + // Capabilities specify the capabilities to keep when executing the process inside the container + // All capabilities not specified will be dropped from the processes capability mask + Capabilities *configs.Capabilities + + // AppArmorProfile specifies the profile to apply to the process and is + // changed at the time the process is execed + AppArmorProfile string + + // Label specifies the label to apply to the process. It is commonly used by selinux + Label string + + // NoNewPrivileges controls whether processes can gain additional privileges. + NoNewPrivileges *bool + + // Rlimits specifies the resource limits, such as max open files, to set in the container + // If Rlimits are not set, the container will inherit rlimits from the parent process + Rlimits []configs.Rlimit + + // ConsoleSocket provides the masterfd console. + ConsoleSocket *os.File + + ops processOperations +} + +// Wait waits for the process to exit. +// Wait releases any resources associated with the Process +func (p Process) Wait() (*os.ProcessState, error) { + if p.ops == nil { + return nil, newGenericError(fmt.Errorf("invalid process"), NoProcessOps) + } + return p.ops.wait() +} + +// Pid returns the process ID +func (p Process) Pid() (int, error) { + // math.MinInt32 is returned here, because it's invalid value + // for the kill() system call. + if p.ops == nil { + return math.MinInt32, newGenericError(fmt.Errorf("invalid process"), NoProcessOps) + } + return p.ops.pid(), nil +} + +// Signal sends a signal to the Process. +func (p Process) Signal(sig os.Signal) error { + if p.ops == nil { + return newGenericError(fmt.Errorf("invalid process"), NoProcessOps) + } + return p.ops.signal(sig) +} + +// IO holds the process's STDIO +type IO struct { + Stdin io.WriteCloser + Stdout io.ReadCloser + Stderr io.ReadCloser +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/process_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/process_linux.go new file mode 100644 index 000000000..171685ccd --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/process_linux.go @@ -0,0 +1,493 @@ +// +build linux + +package libcontainer + +import ( + "encoding/json" + "errors" + "fmt" + "io" + "os" + "os/exec" + "path/filepath" + "strconv" + "syscall" // only for Signal + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/system" + "github.com/opencontainers/runc/libcontainer/utils" + + "golang.org/x/sys/unix" +) + +type parentProcess interface { + // pid returns the pid for the running process. + pid() int + + // start starts the process execution. + start() error + + // send a SIGKILL to the process and wait for the exit. + terminate() error + + // wait waits on the process returning the process state. + wait() (*os.ProcessState, error) + + // startTime returns the process start time. + startTime() (uint64, error) + + signal(os.Signal) error + + externalDescriptors() []string + + setExternalDescriptors(fds []string) +} + +type setnsProcess struct { + cmd *exec.Cmd + parentPipe *os.File + childPipe *os.File + cgroupPaths map[string]string + config *initConfig + fds []string + process *Process + bootstrapData io.Reader +} + +func (p *setnsProcess) startTime() (uint64, error) { + stat, err := system.Stat(p.pid()) + return stat.StartTime, err +} + +func (p *setnsProcess) signal(sig os.Signal) error { + s, ok := sig.(syscall.Signal) + if !ok { + return errors.New("os: unsupported signal type") + } + return unix.Kill(p.pid(), s) +} + +func (p *setnsProcess) start() (err error) { + defer p.parentPipe.Close() + err = p.cmd.Start() + p.childPipe.Close() + if err != nil { + return newSystemErrorWithCause(err, "starting setns process") + } + if p.bootstrapData != nil { + if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil { + return newSystemErrorWithCause(err, "copying bootstrap data to pipe") + } + } + if err = p.execSetns(); err != nil { + return newSystemErrorWithCause(err, "executing setns process") + } + // We can't join cgroups if we're in a rootless container. + if !p.config.Rootless && len(p.cgroupPaths) > 0 { + if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil { + return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid()) + } + } + // set rlimits, this has to be done here because we lose permissions + // to raise the limits once we enter a user-namespace + if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil { + return newSystemErrorWithCause(err, "setting rlimits for process") + } + if err := utils.WriteJSON(p.parentPipe, p.config); err != nil { + return newSystemErrorWithCause(err, "writing config to pipe") + } + + ierr := parseSync(p.parentPipe, func(sync *syncT) error { + switch sync.Type { + case procReady: + // This shouldn't happen. + panic("unexpected procReady in setns") + case procHooks: + // This shouldn't happen. + panic("unexpected procHooks in setns") + default: + return newSystemError(fmt.Errorf("invalid JSON payload from child")) + } + }) + + if err := unix.Shutdown(int(p.parentPipe.Fd()), unix.SHUT_WR); err != nil { + return newSystemErrorWithCause(err, "calling shutdown on init pipe") + } + // Must be done after Shutdown so the child will exit and we can wait for it. + if ierr != nil { + p.wait() + return ierr + } + return nil +} + +// execSetns runs the process that executes C code to perform the setns calls +// because setns support requires the C process to fork off a child and perform the setns +// before the go runtime boots, we wait on the process to die and receive the child's pid +// over the provided pipe. +func (p *setnsProcess) execSetns() error { + status, err := p.cmd.Process.Wait() + if err != nil { + p.cmd.Wait() + return newSystemErrorWithCause(err, "waiting on setns process to finish") + } + if !status.Success() { + p.cmd.Wait() + return newSystemError(&exec.ExitError{ProcessState: status}) + } + var pid *pid + if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil { + p.cmd.Wait() + return newSystemErrorWithCause(err, "reading pid from init pipe") + } + process, err := os.FindProcess(pid.Pid) + if err != nil { + return err + } + p.cmd.Process = process + p.process.ops = p + return nil +} + +// terminate sends a SIGKILL to the forked process for the setns routine then waits to +// avoid the process becoming a zombie. +func (p *setnsProcess) terminate() error { + if p.cmd.Process == nil { + return nil + } + err := p.cmd.Process.Kill() + if _, werr := p.wait(); err == nil { + err = werr + } + return err +} + +func (p *setnsProcess) wait() (*os.ProcessState, error) { + err := p.cmd.Wait() + + // Return actual ProcessState even on Wait error + return p.cmd.ProcessState, err +} + +func (p *setnsProcess) pid() int { + return p.cmd.Process.Pid +} + +func (p *setnsProcess) externalDescriptors() []string { + return p.fds +} + +func (p *setnsProcess) setExternalDescriptors(newFds []string) { + p.fds = newFds +} + +type initProcess struct { + cmd *exec.Cmd + parentPipe *os.File + childPipe *os.File + config *initConfig + manager cgroups.Manager + container *linuxContainer + fds []string + process *Process + bootstrapData io.Reader + sharePidns bool + rootDir *os.File +} + +func (p *initProcess) pid() int { + return p.cmd.Process.Pid +} + +func (p *initProcess) externalDescriptors() []string { + return p.fds +} + +// execSetns runs the process that executes C code to perform the setns calls +// because setns support requires the C process to fork off a child and perform the setns +// before the go runtime boots, we wait on the process to die and receive the child's pid +// over the provided pipe. +// This is called by initProcess.start function +func (p *initProcess) execSetns() error { + status, err := p.cmd.Process.Wait() + if err != nil { + p.cmd.Wait() + return err + } + if !status.Success() { + p.cmd.Wait() + return &exec.ExitError{ProcessState: status} + } + var pid *pid + if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil { + p.cmd.Wait() + return err + } + process, err := os.FindProcess(pid.Pid) + if err != nil { + return err + } + p.cmd.Process = process + p.process.ops = p + return nil +} + +func (p *initProcess) start() error { + defer p.parentPipe.Close() + err := p.cmd.Start() + p.process.ops = p + p.childPipe.Close() + p.rootDir.Close() + if err != nil { + p.process.ops = nil + return newSystemErrorWithCause(err, "starting init process command") + } + if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil { + return newSystemErrorWithCause(err, "copying bootstrap data to pipe") + } + if err := p.execSetns(); err != nil { + return newSystemErrorWithCause(err, "running exec setns process for init") + } + // Save the standard descriptor names before the container process + // can potentially move them (e.g., via dup2()). If we don't do this now, + // we won't know at checkpoint time which file descriptor to look up. + fds, err := getPipeFds(p.pid()) + if err != nil { + return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid()) + } + p.setExternalDescriptors(fds) + // Do this before syncing with child so that no children can escape the + // cgroup. We don't need to worry about not doing this and not being root + // because we'd be using the rootless cgroup manager in that case. + if err := p.manager.Apply(p.pid()); err != nil { + return newSystemErrorWithCause(err, "applying cgroup configuration for process") + } + defer func() { + if err != nil { + // TODO: should not be the responsibility to call here + p.manager.Destroy() + } + }() + if err := p.createNetworkInterfaces(); err != nil { + return newSystemErrorWithCause(err, "creating network interfaces") + } + if err := p.sendConfig(); err != nil { + return newSystemErrorWithCause(err, "sending config to init process") + } + var ( + sentRun bool + sentResume bool + ) + + ierr := parseSync(p.parentPipe, func(sync *syncT) error { + switch sync.Type { + case procReady: + // set rlimits, this has to be done here because we lose permissions + // to raise the limits once we enter a user-namespace + if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil { + return newSystemErrorWithCause(err, "setting rlimits for ready process") + } + // call prestart hooks + if !p.config.Config.Namespaces.Contains(configs.NEWNS) { + // Setup cgroup before prestart hook, so that the prestart hook could apply cgroup permissions. + if err := p.manager.Set(p.config.Config); err != nil { + return newSystemErrorWithCause(err, "setting cgroup config for ready process") + } + + if p.config.Config.Hooks != nil { + s := configs.HookState{ + Version: p.container.config.Version, + ID: p.container.id, + Pid: p.pid(), + Bundle: utils.SearchLabels(p.config.Config.Labels, "bundle"), + } + for i, hook := range p.config.Config.Hooks.Prestart { + if err := hook.Run(s); err != nil { + return newSystemErrorWithCausef(err, "running prestart hook %d", i) + } + } + } + } + // Sync with child. + if err := writeSync(p.parentPipe, procRun); err != nil { + return newSystemErrorWithCause(err, "writing syncT 'run'") + } + sentRun = true + case procHooks: + // Setup cgroup before prestart hook, so that the prestart hook could apply cgroup permissions. + if err := p.manager.Set(p.config.Config); err != nil { + return newSystemErrorWithCause(err, "setting cgroup config for procHooks process") + } + if p.config.Config.Hooks != nil { + s := configs.HookState{ + Version: p.container.config.Version, + ID: p.container.id, + Pid: p.pid(), + Bundle: utils.SearchLabels(p.config.Config.Labels, "bundle"), + } + for i, hook := range p.config.Config.Hooks.Prestart { + if err := hook.Run(s); err != nil { + return newSystemErrorWithCausef(err, "running prestart hook %d", i) + } + } + } + // Sync with child. + if err := writeSync(p.parentPipe, procResume); err != nil { + return newSystemErrorWithCause(err, "writing syncT 'resume'") + } + sentResume = true + default: + return newSystemError(fmt.Errorf("invalid JSON payload from child")) + } + + return nil + }) + + if !sentRun { + return newSystemErrorWithCause(ierr, "container init") + } + if p.config.Config.Namespaces.Contains(configs.NEWNS) && !sentResume { + return newSystemError(fmt.Errorf("could not synchronise after executing prestart hooks with container process")) + } + if err := unix.Shutdown(int(p.parentPipe.Fd()), unix.SHUT_WR); err != nil { + return newSystemErrorWithCause(err, "shutting down init pipe") + } + + // Must be done after Shutdown so the child will exit and we can wait for it. + if ierr != nil { + p.wait() + return ierr + } + return nil +} + +func (p *initProcess) wait() (*os.ProcessState, error) { + err := p.cmd.Wait() + if err != nil { + return p.cmd.ProcessState, err + } + // we should kill all processes in cgroup when init is died if we use host PID namespace + if p.sharePidns { + signalAllProcesses(p.manager, unix.SIGKILL) + } + return p.cmd.ProcessState, nil +} + +func (p *initProcess) terminate() error { + if p.cmd.Process == nil { + return nil + } + err := p.cmd.Process.Kill() + if _, werr := p.wait(); err == nil { + err = werr + } + return err +} + +func (p *initProcess) startTime() (uint64, error) { + stat, err := system.Stat(p.pid()) + return stat.StartTime, err +} + +func (p *initProcess) sendConfig() error { + // send the config to the container's init process, we don't use JSON Encode + // here because there might be a problem in JSON decoder in some cases, see: + // https://github.com/docker/docker/issues/14203#issuecomment-174177790 + return utils.WriteJSON(p.parentPipe, p.config) +} + +func (p *initProcess) createNetworkInterfaces() error { + for _, config := range p.config.Config.Networks { + strategy, err := getStrategy(config.Type) + if err != nil { + return err + } + n := &network{ + Network: *config, + } + if err := strategy.create(n, p.pid()); err != nil { + return err + } + p.config.Networks = append(p.config.Networks, n) + } + return nil +} + +func (p *initProcess) signal(sig os.Signal) error { + s, ok := sig.(syscall.Signal) + if !ok { + return errors.New("os: unsupported signal type") + } + return unix.Kill(p.pid(), s) +} + +func (p *initProcess) setExternalDescriptors(newFds []string) { + p.fds = newFds +} + +func getPipeFds(pid int) ([]string, error) { + fds := make([]string, 3) + + dirPath := filepath.Join("/proc", strconv.Itoa(pid), "/fd") + for i := 0; i < 3; i++ { + // XXX: This breaks if the path is not a valid symlink (which can + // happen in certain particularly unlucky mount namespace setups). + f := filepath.Join(dirPath, strconv.Itoa(i)) + target, err := os.Readlink(f) + if err != nil { + // Ignore permission errors, for rootless containers and other + // non-dumpable processes. if we can't get the fd for a particular + // file, there's not much we can do. + if os.IsPermission(err) { + continue + } + return fds, err + } + fds[i] = target + } + return fds, nil +} + +// InitializeIO creates pipes for use with the process's stdio and returns the +// opposite side for each. Do not use this if you want to have a pseudoterminal +// set up for you by libcontainer (TODO: fix that too). +// TODO: This is mostly unnecessary, and should be handled by clients. +func (p *Process) InitializeIO(rootuid, rootgid int) (i *IO, err error) { + var fds []uintptr + i = &IO{} + // cleanup in case of an error + defer func() { + if err != nil { + for _, fd := range fds { + unix.Close(int(fd)) + } + } + }() + // STDIN + r, w, err := os.Pipe() + if err != nil { + return nil, err + } + fds = append(fds, r.Fd(), w.Fd()) + p.Stdin, i.Stdin = r, w + // STDOUT + if r, w, err = os.Pipe(); err != nil { + return nil, err + } + fds = append(fds, r.Fd(), w.Fd()) + p.Stdout, i.Stdout = w, r + // STDERR + if r, w, err = os.Pipe(); err != nil { + return nil, err + } + fds = append(fds, r.Fd(), w.Fd()) + p.Stderr, i.Stderr = w, r + // change ownership of the pipes incase we are in a user namespace + for _, fd := range fds { + if err := unix.Fchown(int(fd), rootuid, rootgid); err != nil { + return nil, err + } + } + return i, nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/restored_process.go b/vendor/github.com/opencontainers/runc/libcontainer/restored_process.go new file mode 100644 index 000000000..408916ad9 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/restored_process.go @@ -0,0 +1,122 @@ +// +build linux + +package libcontainer + +import ( + "fmt" + "os" + + "github.com/opencontainers/runc/libcontainer/system" +) + +func newRestoredProcess(pid int, fds []string) (*restoredProcess, error) { + var ( + err error + ) + proc, err := os.FindProcess(pid) + if err != nil { + return nil, err + } + stat, err := system.Stat(pid) + if err != nil { + return nil, err + } + return &restoredProcess{ + proc: proc, + processStartTime: stat.StartTime, + fds: fds, + }, nil +} + +type restoredProcess struct { + proc *os.Process + processStartTime uint64 + fds []string +} + +func (p *restoredProcess) start() error { + return newGenericError(fmt.Errorf("restored process cannot be started"), SystemError) +} + +func (p *restoredProcess) pid() int { + return p.proc.Pid +} + +func (p *restoredProcess) terminate() error { + err := p.proc.Kill() + if _, werr := p.wait(); err == nil { + err = werr + } + return err +} + +func (p *restoredProcess) wait() (*os.ProcessState, error) { + // TODO: how do we wait on the actual process? + // maybe use --exec-cmd in criu + st, err := p.proc.Wait() + if err != nil { + return nil, err + } + return st, nil +} + +func (p *restoredProcess) startTime() (uint64, error) { + return p.processStartTime, nil +} + +func (p *restoredProcess) signal(s os.Signal) error { + return p.proc.Signal(s) +} + +func (p *restoredProcess) externalDescriptors() []string { + return p.fds +} + +func (p *restoredProcess) setExternalDescriptors(newFds []string) { + p.fds = newFds +} + +// nonChildProcess represents a process where the calling process is not +// the parent process. This process is created when a factory loads a container from +// a persisted state. +type nonChildProcess struct { + processPid int + processStartTime uint64 + fds []string +} + +func (p *nonChildProcess) start() error { + return newGenericError(fmt.Errorf("restored process cannot be started"), SystemError) +} + +func (p *nonChildProcess) pid() int { + return p.processPid +} + +func (p *nonChildProcess) terminate() error { + return newGenericError(fmt.Errorf("restored process cannot be terminated"), SystemError) +} + +func (p *nonChildProcess) wait() (*os.ProcessState, error) { + return nil, newGenericError(fmt.Errorf("restored process cannot be waited on"), SystemError) +} + +func (p *nonChildProcess) startTime() (uint64, error) { + return p.processStartTime, nil +} + +func (p *nonChildProcess) signal(s os.Signal) error { + proc, err := os.FindProcess(p.processPid) + if err != nil { + return err + } + return proc.Signal(s) +} + +func (p *nonChildProcess) externalDescriptors() []string { + return p.fds +} + +func (p *nonChildProcess) setExternalDescriptors(newFds []string) { + p.fds = newFds +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/rootfs_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/rootfs_linux.go new file mode 100644 index 000000000..e2e734a85 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/rootfs_linux.go @@ -0,0 +1,812 @@ +// +build linux + +package libcontainer + +import ( + "fmt" + "io" + "io/ioutil" + "os" + "os/exec" + "path" + "path/filepath" + "strings" + "time" + + "github.com/docker/docker/pkg/mount" + "github.com/docker/docker/pkg/symlink" + "github.com/mrunalp/fileutils" + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/system" + libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils" + "github.com/opencontainers/selinux/go-selinux/label" + + "golang.org/x/sys/unix" +) + +const defaultMountFlags = unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV + +// needsSetupDev returns true if /dev needs to be set up. +func needsSetupDev(config *configs.Config) bool { + for _, m := range config.Mounts { + if m.Device == "bind" && libcontainerUtils.CleanPath(m.Destination) == "/dev" { + return false + } + } + return true +} + +// prepareRootfs sets up the devices, mount points, and filesystems for use +// inside a new mount namespace. It doesn't set anything as ro. You must call +// finalizeRootfs after this function to finish setting up the rootfs. +func prepareRootfs(pipe io.ReadWriter, config *configs.Config) (err error) { + if err := prepareRoot(config); err != nil { + return newSystemErrorWithCause(err, "preparing rootfs") + } + + setupDev := needsSetupDev(config) + for _, m := range config.Mounts { + for _, precmd := range m.PremountCmds { + if err := mountCmd(precmd); err != nil { + return newSystemErrorWithCause(err, "running premount command") + } + } + + if err := mountToRootfs(m, config.Rootfs, config.MountLabel); err != nil { + return newSystemErrorWithCausef(err, "mounting %q to rootfs %q at %q", m.Source, config.Rootfs, m.Destination) + } + + for _, postcmd := range m.PostmountCmds { + if err := mountCmd(postcmd); err != nil { + return newSystemErrorWithCause(err, "running postmount command") + } + } + } + + if setupDev { + if err := createDevices(config); err != nil { + return newSystemErrorWithCause(err, "creating device nodes") + } + if err := setupPtmx(config); err != nil { + return newSystemErrorWithCause(err, "setting up ptmx") + } + if err := setupDevSymlinks(config.Rootfs); err != nil { + return newSystemErrorWithCause(err, "setting up /dev symlinks") + } + } + + // Signal the parent to run the pre-start hooks. + // The hooks are run after the mounts are setup, but before we switch to the new + // root, so that the old root is still available in the hooks for any mount + // manipulations. + if err := syncParentHooks(pipe); err != nil { + return err + } + + // The reason these operations are done here rather than in finalizeRootfs + // is because the console-handling code gets quite sticky if we have to set + // up the console before doing the pivot_root(2). This is because the + // Console API has to also work with the ExecIn case, which means that the + // API must be able to deal with being inside as well as outside the + // container. It's just cleaner to do this here (at the expense of the + // operation not being perfectly split). + + if err := unix.Chdir(config.Rootfs); err != nil { + return newSystemErrorWithCausef(err, "changing dir to %q", config.Rootfs) + } + + if config.NoPivotRoot { + err = msMoveRoot(config.Rootfs) + } else { + err = pivotRoot(config.Rootfs) + } + if err != nil { + return newSystemErrorWithCause(err, "jailing process inside rootfs") + } + + if setupDev { + if err := reOpenDevNull(); err != nil { + return newSystemErrorWithCause(err, "reopening /dev/null inside container") + } + } + + return nil +} + +// finalizeRootfs sets anything to ro if necessary. You must call +// prepareRootfs first. +func finalizeRootfs(config *configs.Config) (err error) { + // remount dev as ro if specified + for _, m := range config.Mounts { + if libcontainerUtils.CleanPath(m.Destination) == "/dev" { + if m.Flags&unix.MS_RDONLY == unix.MS_RDONLY { + if err := remountReadonly(m); err != nil { + return newSystemErrorWithCausef(err, "remounting %q as readonly", m.Destination) + } + } + break + } + } + + // set rootfs ( / ) as readonly + if config.Readonlyfs { + if err := setReadonly(); err != nil { + return newSystemErrorWithCause(err, "setting rootfs as readonly") + } + } + + unix.Umask(0022) + return nil +} + +func mountCmd(cmd configs.Command) error { + command := exec.Command(cmd.Path, cmd.Args[:]...) + command.Env = cmd.Env + command.Dir = cmd.Dir + if out, err := command.CombinedOutput(); err != nil { + return fmt.Errorf("%#v failed: %s: %v", cmd, string(out), err) + } + return nil +} + +func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error { + var ( + dest = m.Destination + ) + if !strings.HasPrefix(dest, rootfs) { + dest = filepath.Join(rootfs, dest) + } + + switch m.Device { + case "proc", "sysfs": + if err := os.MkdirAll(dest, 0755); err != nil { + return err + } + // Selinux kernels do not support labeling of /proc or /sys + return mountPropagate(m, rootfs, "") + case "mqueue": + if err := os.MkdirAll(dest, 0755); err != nil { + return err + } + if err := mountPropagate(m, rootfs, mountLabel); err != nil { + // older kernels do not support labeling of /dev/mqueue + if err := mountPropagate(m, rootfs, ""); err != nil { + return err + } + return label.SetFileLabel(dest, mountLabel) + } + return nil + case "tmpfs": + copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP + tmpDir := "" + stat, err := os.Stat(dest) + if err != nil { + if err := os.MkdirAll(dest, 0755); err != nil { + return err + } + } + if copyUp { + tmpDir, err = ioutil.TempDir("/tmp", "runctmpdir") + if err != nil { + return newSystemErrorWithCause(err, "tmpcopyup: failed to create tmpdir") + } + defer os.RemoveAll(tmpDir) + m.Destination = tmpDir + } + if err := mountPropagate(m, rootfs, mountLabel); err != nil { + return err + } + if copyUp { + if err := fileutils.CopyDirectory(dest, tmpDir); err != nil { + errMsg := fmt.Errorf("tmpcopyup: failed to copy %s to %s: %v", dest, tmpDir, err) + if err1 := unix.Unmount(tmpDir, unix.MNT_DETACH); err1 != nil { + return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg) + } + return errMsg + } + if err := unix.Mount(tmpDir, dest, "", unix.MS_MOVE, ""); err != nil { + errMsg := fmt.Errorf("tmpcopyup: failed to move mount %s to %s: %v", tmpDir, dest, err) + if err1 := unix.Unmount(tmpDir, unix.MNT_DETACH); err1 != nil { + return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg) + } + return errMsg + } + } + if stat != nil { + if err = os.Chmod(dest, stat.Mode()); err != nil { + return err + } + } + return nil + case "bind": + stat, err := os.Stat(m.Source) + if err != nil { + // error out if the source of a bind mount does not exist as we will be + // unable to bind anything to it. + return err + } + // ensure that the destination of the bind mount is resolved of symlinks at mount time because + // any previous mounts can invalidate the next mount's destination. + // this can happen when a user specifies mounts within other mounts to cause breakouts or other + // evil stuff to try to escape the container's rootfs. + if dest, err = symlink.FollowSymlinkInScope(dest, rootfs); err != nil { + return err + } + if err := checkMountDestination(rootfs, dest); err != nil { + return err + } + // update the mount with the correct dest after symlinks are resolved. + m.Destination = dest + if err := createIfNotExists(dest, stat.IsDir()); err != nil { + return err + } + if err := mountPropagate(m, rootfs, mountLabel); err != nil { + return err + } + // bind mount won't change mount options, we need remount to make mount options effective. + // first check that we have non-default options required before attempting a remount + if m.Flags&^(unix.MS_REC|unix.MS_REMOUNT|unix.MS_BIND) != 0 { + // only remount if unique mount options are set + if err := remount(m, rootfs); err != nil { + return err + } + } + + if m.Relabel != "" { + if err := label.Validate(m.Relabel); err != nil { + return err + } + shared := label.IsShared(m.Relabel) + if err := label.Relabel(m.Source, mountLabel, shared); err != nil { + return err + } + } + case "cgroup": + binds, err := getCgroupMounts(m) + if err != nil { + return err + } + var merged []string + for _, b := range binds { + ss := filepath.Base(b.Destination) + if strings.Contains(ss, ",") { + merged = append(merged, ss) + } + } + tmpfs := &configs.Mount{ + Source: "tmpfs", + Device: "tmpfs", + Destination: m.Destination, + Flags: defaultMountFlags, + Data: "mode=755", + PropagationFlags: m.PropagationFlags, + } + if err := mountToRootfs(tmpfs, rootfs, mountLabel); err != nil { + return err + } + for _, b := range binds { + if err := mountToRootfs(b, rootfs, mountLabel); err != nil { + return err + } + } + for _, mc := range merged { + for _, ss := range strings.Split(mc, ",") { + // symlink(2) is very dumb, it will just shove the path into + // the link and doesn't do any checks or relative path + // conversion. Also, don't error out if the cgroup already exists. + if err := os.Symlink(mc, filepath.Join(rootfs, m.Destination, ss)); err != nil && !os.IsExist(err) { + return err + } + } + } + if m.Flags&unix.MS_RDONLY != 0 { + // remount cgroup root as readonly + mcgrouproot := &configs.Mount{ + Source: m.Destination, + Device: "bind", + Destination: m.Destination, + Flags: defaultMountFlags | unix.MS_RDONLY | unix.MS_BIND, + } + if err := remount(mcgrouproot, rootfs); err != nil { + return err + } + } + default: + // ensure that the destination of the mount is resolved of symlinks at mount time because + // any previous mounts can invalidate the next mount's destination. + // this can happen when a user specifies mounts within other mounts to cause breakouts or other + // evil stuff to try to escape the container's rootfs. + var err error + if dest, err = symlink.FollowSymlinkInScope(dest, rootfs); err != nil { + return err + } + if err := checkMountDestination(rootfs, dest); err != nil { + return err + } + // update the mount with the correct dest after symlinks are resolved. + m.Destination = dest + if err := os.MkdirAll(dest, 0755); err != nil { + return err + } + return mountPropagate(m, rootfs, mountLabel) + } + return nil +} + +func getCgroupMounts(m *configs.Mount) ([]*configs.Mount, error) { + mounts, err := cgroups.GetCgroupMounts(false) + if err != nil { + return nil, err + } + + cgroupPaths, err := cgroups.ParseCgroupFile("/proc/self/cgroup") + if err != nil { + return nil, err + } + + var binds []*configs.Mount + + for _, mm := range mounts { + dir, err := mm.GetOwnCgroup(cgroupPaths) + if err != nil { + return nil, err + } + relDir, err := filepath.Rel(mm.Root, dir) + if err != nil { + return nil, err + } + binds = append(binds, &configs.Mount{ + Device: "bind", + Source: filepath.Join(mm.Mountpoint, relDir), + Destination: filepath.Join(m.Destination, filepath.Base(mm.Mountpoint)), + Flags: unix.MS_BIND | unix.MS_REC | m.Flags, + PropagationFlags: m.PropagationFlags, + }) + } + + return binds, nil +} + +// checkMountDestination checks to ensure that the mount destination is not over the top of /proc. +// dest is required to be an abs path and have any symlinks resolved before calling this function. +func checkMountDestination(rootfs, dest string) error { + invalidDestinations := []string{ + "/proc", + } + // White list, it should be sub directories of invalid destinations + validDestinations := []string{ + // These entries can be bind mounted by files emulated by fuse, + // so commands like top, free displays stats in container. + "/proc/cpuinfo", + "/proc/diskstats", + "/proc/meminfo", + "/proc/stat", + "/proc/swaps", + "/proc/uptime", + "/proc/net/dev", + } + for _, valid := range validDestinations { + path, err := filepath.Rel(filepath.Join(rootfs, valid), dest) + if err != nil { + return err + } + if path == "." { + return nil + } + } + for _, invalid := range invalidDestinations { + path, err := filepath.Rel(filepath.Join(rootfs, invalid), dest) + if err != nil { + return err + } + if path == "." || !strings.HasPrefix(path, "..") { + return fmt.Errorf("%q cannot be mounted because it is located inside %q", dest, invalid) + } + } + return nil +} + +func setupDevSymlinks(rootfs string) error { + var links = [][2]string{ + {"/proc/self/fd", "/dev/fd"}, + {"/proc/self/fd/0", "/dev/stdin"}, + {"/proc/self/fd/1", "/dev/stdout"}, + {"/proc/self/fd/2", "/dev/stderr"}, + } + // kcore support can be toggled with CONFIG_PROC_KCORE; only create a symlink + // in /dev if it exists in /proc. + if _, err := os.Stat("/proc/kcore"); err == nil { + links = append(links, [2]string{"/proc/kcore", "/dev/core"}) + } + for _, link := range links { + var ( + src = link[0] + dst = filepath.Join(rootfs, link[1]) + ) + if err := os.Symlink(src, dst); err != nil && !os.IsExist(err) { + return fmt.Errorf("symlink %s %s %s", src, dst, err) + } + } + return nil +} + +// If stdin, stdout, and/or stderr are pointing to `/dev/null` in the parent's rootfs +// this method will make them point to `/dev/null` in this container's rootfs. This +// needs to be called after we chroot/pivot into the container's rootfs so that any +// symlinks are resolved locally. +func reOpenDevNull() error { + var stat, devNullStat unix.Stat_t + file, err := os.OpenFile("/dev/null", os.O_RDWR, 0) + if err != nil { + return fmt.Errorf("Failed to open /dev/null - %s", err) + } + defer file.Close() + if err := unix.Fstat(int(file.Fd()), &devNullStat); err != nil { + return err + } + for fd := 0; fd < 3; fd++ { + if err := unix.Fstat(fd, &stat); err != nil { + return err + } + if stat.Rdev == devNullStat.Rdev { + // Close and re-open the fd. + if err := unix.Dup3(int(file.Fd()), fd, 0); err != nil { + return err + } + } + } + return nil +} + +// Create the device nodes in the container. +func createDevices(config *configs.Config) error { + useBindMount := system.RunningInUserNS() || config.Namespaces.Contains(configs.NEWUSER) + oldMask := unix.Umask(0000) + for _, node := range config.Devices { + // containers running in a user namespace are not allowed to mknod + // devices so we can just bind mount it from the host. + if err := createDeviceNode(config.Rootfs, node, useBindMount); err != nil { + unix.Umask(oldMask) + return err + } + } + unix.Umask(oldMask) + return nil +} + +func bindMountDeviceNode(dest string, node *configs.Device) error { + f, err := os.Create(dest) + if err != nil && !os.IsExist(err) { + return err + } + if f != nil { + f.Close() + } + return unix.Mount(node.Path, dest, "bind", unix.MS_BIND, "") +} + +// Creates the device node in the rootfs of the container. +func createDeviceNode(rootfs string, node *configs.Device, bind bool) error { + dest := filepath.Join(rootfs, node.Path) + if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil { + return err + } + + if bind { + return bindMountDeviceNode(dest, node) + } + if err := mknodDevice(dest, node); err != nil { + if os.IsExist(err) { + return nil + } else if os.IsPermission(err) { + return bindMountDeviceNode(dest, node) + } + return err + } + return nil +} + +func mknodDevice(dest string, node *configs.Device) error { + fileMode := node.FileMode + switch node.Type { + case 'c', 'u': + fileMode |= unix.S_IFCHR + case 'b': + fileMode |= unix.S_IFBLK + case 'p': + fileMode |= unix.S_IFIFO + default: + return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path) + } + if err := unix.Mknod(dest, uint32(fileMode), node.Mkdev()); err != nil { + return err + } + return unix.Chown(dest, int(node.Uid), int(node.Gid)) +} + +func getMountInfo(mountinfo []*mount.Info, dir string) *mount.Info { + for _, m := range mountinfo { + if m.Mountpoint == dir { + return m + } + } + return nil +} + +// Get the parent mount point of directory passed in as argument. Also return +// optional fields. +func getParentMount(rootfs string) (string, string, error) { + var path string + + mountinfos, err := mount.GetMounts() + if err != nil { + return "", "", err + } + + mountinfo := getMountInfo(mountinfos, rootfs) + if mountinfo != nil { + return rootfs, mountinfo.Optional, nil + } + + path = rootfs + for { + path = filepath.Dir(path) + + mountinfo = getMountInfo(mountinfos, path) + if mountinfo != nil { + return path, mountinfo.Optional, nil + } + + if path == "/" { + break + } + } + + // If we are here, we did not find parent mount. Something is wrong. + return "", "", fmt.Errorf("Could not find parent mount of %s", rootfs) +} + +// Make parent mount private if it was shared +func rootfsParentMountPrivate(rootfs string) error { + sharedMount := false + + parentMount, optionalOpts, err := getParentMount(rootfs) + if err != nil { + return err + } + + optsSplit := strings.Split(optionalOpts, " ") + for _, opt := range optsSplit { + if strings.HasPrefix(opt, "shared:") { + sharedMount = true + break + } + } + + // Make parent mount PRIVATE if it was shared. It is needed for two + // reasons. First of all pivot_root() will fail if parent mount is + // shared. Secondly when we bind mount rootfs it will propagate to + // parent namespace and we don't want that to happen. + if sharedMount { + return unix.Mount("", parentMount, "", unix.MS_PRIVATE, "") + } + + return nil +} + +func prepareRoot(config *configs.Config) error { + flag := unix.MS_SLAVE | unix.MS_REC + if config.RootPropagation != 0 { + flag = config.RootPropagation + } + if err := unix.Mount("", "/", "", uintptr(flag), ""); err != nil { + return err + } + + // Make parent mount private to make sure following bind mount does + // not propagate in other namespaces. Also it will help with kernel + // check pass in pivot_root. (IS_SHARED(new_mnt->mnt_parent)) + if err := rootfsParentMountPrivate(config.Rootfs); err != nil { + return err + } + + return unix.Mount(config.Rootfs, config.Rootfs, "bind", unix.MS_BIND|unix.MS_REC, "") +} + +func setReadonly() error { + return unix.Mount("/", "/", "bind", unix.MS_BIND|unix.MS_REMOUNT|unix.MS_RDONLY|unix.MS_REC, "") +} + +func setupPtmx(config *configs.Config) error { + ptmx := filepath.Join(config.Rootfs, "dev/ptmx") + if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) { + return err + } + if err := os.Symlink("pts/ptmx", ptmx); err != nil { + return fmt.Errorf("symlink dev ptmx %s", err) + } + return nil +} + +// pivotRoot will call pivot_root such that rootfs becomes the new root +// filesystem, and everything else is cleaned up. +func pivotRoot(rootfs string) error { + // While the documentation may claim otherwise, pivot_root(".", ".") is + // actually valid. What this results in is / being the new root but + // /proc/self/cwd being the old root. Since we can play around with the cwd + // with pivot_root this allows us to pivot without creating directories in + // the rootfs. Shout-outs to the LXC developers for giving us this idea. + + oldroot, err := unix.Open("/", unix.O_DIRECTORY|unix.O_RDONLY, 0) + if err != nil { + return err + } + defer unix.Close(oldroot) + + newroot, err := unix.Open(rootfs, unix.O_DIRECTORY|unix.O_RDONLY, 0) + if err != nil { + return err + } + defer unix.Close(newroot) + + // Change to the new root so that the pivot_root actually acts on it. + if err := unix.Fchdir(newroot); err != nil { + return err + } + + if err := unix.PivotRoot(".", "."); err != nil { + return fmt.Errorf("pivot_root %s", err) + } + + // Currently our "." is oldroot (according to the current kernel code). + // However, purely for safety, we will fchdir(oldroot) since there isn't + // really any guarantee from the kernel what /proc/self/cwd will be after a + // pivot_root(2). + + if err := unix.Fchdir(oldroot); err != nil { + return err + } + + // Make oldroot rprivate to make sure our unmounts don't propagate to the + // host (and thus bork the machine). + if err := unix.Mount("", ".", "", unix.MS_PRIVATE|unix.MS_REC, ""); err != nil { + return err + } + // Preform the unmount. MNT_DETACH allows us to unmount /proc/self/cwd. + if err := unix.Unmount(".", unix.MNT_DETACH); err != nil { + return err + } + + // Switch back to our shiny new root. + if err := unix.Chdir("/"); err != nil { + return fmt.Errorf("chdir / %s", err) + } + return nil +} + +func msMoveRoot(rootfs string) error { + if err := unix.Mount(rootfs, "/", "", unix.MS_MOVE, ""); err != nil { + return err + } + if err := unix.Chroot("."); err != nil { + return err + } + return unix.Chdir("/") +} + +// createIfNotExists creates a file or a directory only if it does not already exist. +func createIfNotExists(path string, isDir bool) error { + if _, err := os.Stat(path); err != nil { + if os.IsNotExist(err) { + if isDir { + return os.MkdirAll(path, 0755) + } + if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil { + return err + } + f, err := os.OpenFile(path, os.O_CREATE, 0755) + if err != nil { + return err + } + f.Close() + } + } + return nil +} + +// readonlyPath will make a path read only. +func readonlyPath(path string) error { + if err := unix.Mount(path, path, "", unix.MS_BIND|unix.MS_REC, ""); err != nil { + if os.IsNotExist(err) { + return nil + } + return err + } + return unix.Mount(path, path, "", unix.MS_BIND|unix.MS_REMOUNT|unix.MS_RDONLY|unix.MS_REC, "") +} + +// remountReadonly will remount an existing mount point and ensure that it is read-only. +func remountReadonly(m *configs.Mount) error { + var ( + dest = m.Destination + flags = m.Flags + ) + for i := 0; i < 5; i++ { + if err := unix.Mount("", dest, "", uintptr(flags|unix.MS_REMOUNT|unix.MS_RDONLY), ""); err != nil { + switch err { + case unix.EBUSY: + time.Sleep(100 * time.Millisecond) + continue + default: + return err + } + } + return nil + } + return fmt.Errorf("unable to mount %s as readonly max retries reached", dest) +} + +// maskPath masks the top of the specified path inside a container to avoid +// security issues from processes reading information from non-namespace aware +// mounts ( proc/kcore ). +// For files, maskPath bind mounts /dev/null over the top of the specified path. +// For directories, maskPath mounts read-only tmpfs over the top of the specified path. +func maskPath(path string) error { + if err := unix.Mount("/dev/null", path, "", unix.MS_BIND, ""); err != nil && !os.IsNotExist(err) { + if err == unix.ENOTDIR { + return unix.Mount("tmpfs", path, "tmpfs", unix.MS_RDONLY, "") + } + return err + } + return nil +} + +// writeSystemProperty writes the value to a path under /proc/sys as determined from the key. +// For e.g. net.ipv4.ip_forward translated to /proc/sys/net/ipv4/ip_forward. +func writeSystemProperty(key, value string) error { + keyPath := strings.Replace(key, ".", "/", -1) + return ioutil.WriteFile(path.Join("/proc/sys", keyPath), []byte(value), 0644) +} + +func remount(m *configs.Mount, rootfs string) error { + var ( + dest = m.Destination + ) + if !strings.HasPrefix(dest, rootfs) { + dest = filepath.Join(rootfs, dest) + } + if err := unix.Mount(m.Source, dest, m.Device, uintptr(m.Flags|unix.MS_REMOUNT), ""); err != nil { + return err + } + return nil +} + +// Do the mount operation followed by additional mounts required to take care +// of propagation flags. +func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error { + var ( + dest = m.Destination + data = label.FormatMountLabel(m.Data, mountLabel) + flags = m.Flags + ) + if libcontainerUtils.CleanPath(dest) == "/dev" { + flags &= ^unix.MS_RDONLY + } + + copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP + if !(copyUp || strings.HasPrefix(dest, rootfs)) { + dest = filepath.Join(rootfs, dest) + } + + if err := unix.Mount(m.Source, dest, m.Device, uintptr(flags), data); err != nil { + return err + } + + for _, pflag := range m.PropagationFlags { + if err := unix.Mount("", dest, "", uintptr(pflag), ""); err != nil { + return err + } + } + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/seccomp/config.go b/vendor/github.com/opencontainers/runc/libcontainer/seccomp/config.go new file mode 100644 index 000000000..ded5a6bbc --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/seccomp/config.go @@ -0,0 +1,76 @@ +package seccomp + +import ( + "fmt" + + "github.com/opencontainers/runc/libcontainer/configs" +) + +var operators = map[string]configs.Operator{ + "SCMP_CMP_NE": configs.NotEqualTo, + "SCMP_CMP_LT": configs.LessThan, + "SCMP_CMP_LE": configs.LessThanOrEqualTo, + "SCMP_CMP_EQ": configs.EqualTo, + "SCMP_CMP_GE": configs.GreaterThanOrEqualTo, + "SCMP_CMP_GT": configs.GreaterThan, + "SCMP_CMP_MASKED_EQ": configs.MaskEqualTo, +} + +var actions = map[string]configs.Action{ + "SCMP_ACT_KILL": configs.Kill, + "SCMP_ACT_ERRNO": configs.Errno, + "SCMP_ACT_TRAP": configs.Trap, + "SCMP_ACT_ALLOW": configs.Allow, + "SCMP_ACT_TRACE": configs.Trace, +} + +var archs = map[string]string{ + "SCMP_ARCH_X86": "x86", + "SCMP_ARCH_X86_64": "amd64", + "SCMP_ARCH_X32": "x32", + "SCMP_ARCH_ARM": "arm", + "SCMP_ARCH_AARCH64": "arm64", + "SCMP_ARCH_MIPS": "mips", + "SCMP_ARCH_MIPS64": "mips64", + "SCMP_ARCH_MIPS64N32": "mips64n32", + "SCMP_ARCH_MIPSEL": "mipsel", + "SCMP_ARCH_MIPSEL64": "mipsel64", + "SCMP_ARCH_MIPSEL64N32": "mipsel64n32", + "SCMP_ARCH_PPC": "ppc", + "SCMP_ARCH_PPC64": "ppc64", + "SCMP_ARCH_PPC64LE": "ppc64le", + "SCMP_ARCH_S390": "s390", + "SCMP_ARCH_S390X": "s390x", +} + +// ConvertStringToOperator converts a string into a Seccomp comparison operator. +// Comparison operators use the names they are assigned by Libseccomp's header. +// Attempting to convert a string that is not a valid operator results in an +// error. +func ConvertStringToOperator(in string) (configs.Operator, error) { + if op, ok := operators[in]; ok == true { + return op, nil + } + return 0, fmt.Errorf("string %s is not a valid operator for seccomp", in) +} + +// ConvertStringToAction converts a string into a Seccomp rule match action. +// Actions use the names they are assigned in Libseccomp's header, though some +// (notable, SCMP_ACT_TRACE) are not available in this implementation and will +// return errors. +// Attempting to convert a string that is not a valid action results in an +// error. +func ConvertStringToAction(in string) (configs.Action, error) { + if act, ok := actions[in]; ok == true { + return act, nil + } + return 0, fmt.Errorf("string %s is not a valid action for seccomp", in) +} + +// ConvertStringToArch converts a string into a Seccomp comparison arch. +func ConvertStringToArch(in string) (string, error) { + if arch, ok := archs[in]; ok == true { + return arch, nil + } + return "", fmt.Errorf("string %s is not a valid arch for seccomp", in) +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go new file mode 100644 index 000000000..2523cbf99 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go @@ -0,0 +1,227 @@ +// +build linux,cgo,seccomp + +package seccomp + +import ( + "bufio" + "fmt" + "os" + "strings" + + "github.com/opencontainers/runc/libcontainer/configs" + libseccomp "github.com/seccomp/libseccomp-golang" + + "golang.org/x/sys/unix" +) + +var ( + actAllow = libseccomp.ActAllow + actTrap = libseccomp.ActTrap + actKill = libseccomp.ActKill + actTrace = libseccomp.ActTrace.SetReturnCode(int16(unix.EPERM)) + actErrno = libseccomp.ActErrno.SetReturnCode(int16(unix.EPERM)) +) + +// Filters given syscalls in a container, preventing them from being used +// Started in the container init process, and carried over to all child processes +// Setns calls, however, require a separate invocation, as they are not children +// of the init until they join the namespace +func InitSeccomp(config *configs.Seccomp) error { + if config == nil { + return fmt.Errorf("cannot initialize Seccomp - nil config passed") + } + + defaultAction, err := getAction(config.DefaultAction) + if err != nil { + return fmt.Errorf("error initializing seccomp - invalid default action") + } + + filter, err := libseccomp.NewFilter(defaultAction) + if err != nil { + return fmt.Errorf("error creating filter: %s", err) + } + + // Add extra architectures + for _, arch := range config.Architectures { + scmpArch, err := libseccomp.GetArchFromString(arch) + if err != nil { + return err + } + + if err := filter.AddArch(scmpArch); err != nil { + return err + } + } + + // Unset no new privs bit + if err := filter.SetNoNewPrivsBit(false); err != nil { + return fmt.Errorf("error setting no new privileges: %s", err) + } + + // Add a rule for each syscall + for _, call := range config.Syscalls { + if call == nil { + return fmt.Errorf("encountered nil syscall while initializing Seccomp") + } + + if err = matchCall(filter, call); err != nil { + return err + } + } + + if err = filter.Load(); err != nil { + return fmt.Errorf("error loading seccomp filter into kernel: %s", err) + } + + return nil +} + +// IsEnabled returns if the kernel has been configured to support seccomp. +func IsEnabled() bool { + // Try to read from /proc/self/status for kernels > 3.8 + s, err := parseStatusFile("/proc/self/status") + if err != nil { + // Check if Seccomp is supported, via CONFIG_SECCOMP. + if err := unix.Prctl(unix.PR_GET_SECCOMP, 0, 0, 0, 0); err != unix.EINVAL { + // Make sure the kernel has CONFIG_SECCOMP_FILTER. + if err := unix.Prctl(unix.PR_SET_SECCOMP, unix.SECCOMP_MODE_FILTER, 0, 0, 0); err != unix.EINVAL { + return true + } + } + return false + } + _, ok := s["Seccomp"] + return ok +} + +// Convert Libcontainer Action to Libseccomp ScmpAction +func getAction(act configs.Action) (libseccomp.ScmpAction, error) { + switch act { + case configs.Kill: + return actKill, nil + case configs.Errno: + return actErrno, nil + case configs.Trap: + return actTrap, nil + case configs.Allow: + return actAllow, nil + case configs.Trace: + return actTrace, nil + default: + return libseccomp.ActInvalid, fmt.Errorf("invalid action, cannot use in rule") + } +} + +// Convert Libcontainer Operator to Libseccomp ScmpCompareOp +func getOperator(op configs.Operator) (libseccomp.ScmpCompareOp, error) { + switch op { + case configs.EqualTo: + return libseccomp.CompareEqual, nil + case configs.NotEqualTo: + return libseccomp.CompareNotEqual, nil + case configs.GreaterThan: + return libseccomp.CompareGreater, nil + case configs.GreaterThanOrEqualTo: + return libseccomp.CompareGreaterEqual, nil + case configs.LessThan: + return libseccomp.CompareLess, nil + case configs.LessThanOrEqualTo: + return libseccomp.CompareLessOrEqual, nil + case configs.MaskEqualTo: + return libseccomp.CompareMaskedEqual, nil + default: + return libseccomp.CompareInvalid, fmt.Errorf("invalid operator, cannot use in rule") + } +} + +// Convert Libcontainer Arg to Libseccomp ScmpCondition +func getCondition(arg *configs.Arg) (libseccomp.ScmpCondition, error) { + cond := libseccomp.ScmpCondition{} + + if arg == nil { + return cond, fmt.Errorf("cannot convert nil to syscall condition") + } + + op, err := getOperator(arg.Op) + if err != nil { + return cond, err + } + + return libseccomp.MakeCondition(arg.Index, op, arg.Value, arg.ValueTwo) +} + +// Add a rule to match a single syscall +func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall) error { + if call == nil || filter == nil { + return fmt.Errorf("cannot use nil as syscall to block") + } + + if len(call.Name) == 0 { + return fmt.Errorf("empty string is not a valid syscall") + } + + // If we can't resolve the syscall, assume it's not supported on this kernel + // Ignore it, don't error out + callNum, err := libseccomp.GetSyscallFromName(call.Name) + if err != nil { + return nil + } + + // Convert the call's action to the libseccomp equivalent + callAct, err := getAction(call.Action) + if err != nil { + return err + } + + // Unconditional match - just add the rule + if len(call.Args) == 0 { + if err = filter.AddRule(callNum, callAct); err != nil { + return err + } + } else { + // Conditional match - convert the per-arg rules into library format + conditions := []libseccomp.ScmpCondition{} + + for _, cond := range call.Args { + newCond, err := getCondition(cond) + if err != nil { + return err + } + + conditions = append(conditions, newCond) + } + + if err = filter.AddRuleConditional(callNum, callAct, conditions); err != nil { + return err + } + } + + return nil +} + +func parseStatusFile(path string) (map[string]string, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + defer f.Close() + + s := bufio.NewScanner(f) + status := make(map[string]string) + + for s.Scan() { + text := s.Text() + parts := strings.Split(text, ":") + + if len(parts) <= 1 { + continue + } + + status[parts[0]] = parts[1] + } + if err := s.Err(); err != nil { + return nil, err + } + + return status, nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_unsupported.go new file mode 100644 index 000000000..44df1ad4c --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_unsupported.go @@ -0,0 +1,24 @@ +// +build !linux !cgo !seccomp + +package seccomp + +import ( + "errors" + + "github.com/opencontainers/runc/libcontainer/configs" +) + +var ErrSeccompNotEnabled = errors.New("seccomp: config provided but seccomp not supported") + +// InitSeccomp does nothing because seccomp is not supported. +func InitSeccomp(config *configs.Seccomp) error { + if config != nil { + return ErrSeccompNotEnabled + } + return nil +} + +// IsEnabled returns false, because it is not supported. +func IsEnabled() bool { + return false +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/setgroups_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/setgroups_linux.go new file mode 100644 index 000000000..c7bdb605a --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/setgroups_linux.go @@ -0,0 +1,11 @@ +// +build linux,go1.5 + +package libcontainer + +import "syscall" + +// Set the GidMappingsEnableSetgroups member to true, so the process's +// setgroups proc entry wont be set to 'deny' if GidMappings are set +func enableSetgroups(sys *syscall.SysProcAttr) { + sys.GidMappingsEnableSetgroups = true +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/setns_init_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/setns_init_linux.go new file mode 100644 index 000000000..35b84219c --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/setns_init_linux.go @@ -0,0 +1,65 @@ +// +build linux + +package libcontainer + +import ( + "fmt" + "os" + + "github.com/opencontainers/runc/libcontainer/apparmor" + "github.com/opencontainers/runc/libcontainer/keys" + "github.com/opencontainers/runc/libcontainer/seccomp" + "github.com/opencontainers/runc/libcontainer/system" + "github.com/opencontainers/selinux/go-selinux/label" + + "golang.org/x/sys/unix" +) + +// linuxSetnsInit performs the container's initialization for running a new process +// inside an existing container. +type linuxSetnsInit struct { + pipe *os.File + consoleSocket *os.File + config *initConfig +} + +func (l *linuxSetnsInit) getSessionRingName() string { + return fmt.Sprintf("_ses.%s", l.config.ContainerId) +} + +func (l *linuxSetnsInit) Init() error { + if !l.config.Config.NoNewKeyring { + // do not inherit the parent's session keyring + if _, err := keys.JoinSessionKeyring(l.getSessionRingName()); err != nil { + return err + } + } + if l.config.CreateConsole { + if err := setupConsole(l.consoleSocket, l.config, false); err != nil { + return err + } + if err := system.Setctty(); err != nil { + return err + } + } + if l.config.NoNewPrivileges { + if err := unix.Prctl(unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil { + return err + } + } + if l.config.Config.Seccomp != nil { + if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil { + return err + } + } + if err := finalizeNamespace(l.config); err != nil { + return err + } + if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil { + return err + } + if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil { + return err + } + return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ()) +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/stacktrace/capture.go b/vendor/github.com/opencontainers/runc/libcontainer/stacktrace/capture.go new file mode 100644 index 000000000..0bbe14950 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/stacktrace/capture.go @@ -0,0 +1,27 @@ +package stacktrace + +import "runtime" + +// Capture captures a stacktrace for the current calling go program +// +// skip is the number of frames to skip +func Capture(userSkip int) Stacktrace { + var ( + skip = userSkip + 1 // add one for our own function + frames []Frame + prevPc uintptr + ) + for i := skip; ; i++ { + pc, file, line, ok := runtime.Caller(i) + //detect if caller is repeated to avoid loop, gccgo + //currently runs into a loop without this check + if !ok || pc == prevPc { + break + } + frames = append(frames, NewFrame(pc, file, line)) + prevPc = pc + } + return Stacktrace{ + Frames: frames, + } +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/stacktrace/frame.go b/vendor/github.com/opencontainers/runc/libcontainer/stacktrace/frame.go new file mode 100644 index 000000000..0d590d9a5 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/stacktrace/frame.go @@ -0,0 +1,38 @@ +package stacktrace + +import ( + "path/filepath" + "runtime" + "strings" +) + +// NewFrame returns a new stack frame for the provided information +func NewFrame(pc uintptr, file string, line int) Frame { + fn := runtime.FuncForPC(pc) + if fn == nil { + return Frame{} + } + pack, name := parseFunctionName(fn.Name()) + return Frame{ + Line: line, + File: filepath.Base(file), + Package: pack, + Function: name, + } +} + +func parseFunctionName(name string) (string, string) { + i := strings.LastIndex(name, ".") + if i == -1 { + return "", name + } + return name[:i], name[i+1:] +} + +// Frame contains all the information for a stack frame within a go program +type Frame struct { + File string + Function string + Package string + Line int +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/stacktrace/stacktrace.go b/vendor/github.com/opencontainers/runc/libcontainer/stacktrace/stacktrace.go new file mode 100644 index 000000000..5e8b58d2d --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/stacktrace/stacktrace.go @@ -0,0 +1,5 @@ +package stacktrace + +type Stacktrace struct { + Frames []Frame +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/standard_init_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/standard_init_linux.go new file mode 100644 index 000000000..580b3fe45 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/standard_init_linux.go @@ -0,0 +1,188 @@ +// +build linux + +package libcontainer + +import ( + "fmt" + "os" + "os/exec" + "syscall" //only for Exec + + "github.com/opencontainers/runc/libcontainer/apparmor" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/keys" + "github.com/opencontainers/runc/libcontainer/seccomp" + "github.com/opencontainers/runc/libcontainer/system" + "github.com/opencontainers/selinux/go-selinux/label" + + "golang.org/x/sys/unix" +) + +type linuxStandardInit struct { + pipe *os.File + consoleSocket *os.File + parentPid int + stateDirFD int + config *initConfig +} + +func (l *linuxStandardInit) getSessionRingParams() (string, uint32, uint32) { + var newperms uint32 + + if l.config.Config.Namespaces.Contains(configs.NEWUSER) { + // with user ns we need 'other' search permissions + newperms = 0x8 + } else { + // without user ns we need 'UID' search permissions + newperms = 0x80000 + } + + // create a unique per session container name that we can + // join in setns; however, other containers can also join it + return fmt.Sprintf("_ses.%s", l.config.ContainerId), 0xffffffff, newperms +} + +func (l *linuxStandardInit) Init() error { + if !l.config.Config.NoNewKeyring { + ringname, keepperms, newperms := l.getSessionRingParams() + + // do not inherit the parent's session keyring + sessKeyId, err := keys.JoinSessionKeyring(ringname) + if err != nil { + return err + } + // make session keyring searcheable + if err := keys.ModKeyringPerm(sessKeyId, keepperms, newperms); err != nil { + return err + } + } + + if err := setupNetwork(l.config); err != nil { + return err + } + if err := setupRoute(l.config.Config); err != nil { + return err + } + + label.Init() + + // prepareRootfs() can be executed only for a new mount namespace. + if l.config.Config.Namespaces.Contains(configs.NEWNS) { + if err := prepareRootfs(l.pipe, l.config.Config); err != nil { + return err + } + } + + // Set up the console. This has to be done *before* we finalize the rootfs, + // but *after* we've given the user the chance to set up all of the mounts + // they wanted. + if l.config.CreateConsole { + if err := setupConsole(l.consoleSocket, l.config, true); err != nil { + return err + } + if err := system.Setctty(); err != nil { + return err + } + } + + // Finish the rootfs setup. + if l.config.Config.Namespaces.Contains(configs.NEWNS) { + if err := finalizeRootfs(l.config.Config); err != nil { + return err + } + } + + if hostname := l.config.Config.Hostname; hostname != "" { + if err := unix.Sethostname([]byte(hostname)); err != nil { + return err + } + } + if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil { + return err + } + if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil { + return err + } + + for key, value := range l.config.Config.Sysctl { + if err := writeSystemProperty(key, value); err != nil { + return err + } + } + for _, path := range l.config.Config.ReadonlyPaths { + if err := readonlyPath(path); err != nil { + return err + } + } + for _, path := range l.config.Config.MaskPaths { + if err := maskPath(path); err != nil { + return err + } + } + pdeath, err := system.GetParentDeathSignal() + if err != nil { + return err + } + if l.config.NoNewPrivileges { + if err := unix.Prctl(unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil { + return err + } + } + // Tell our parent that we're ready to Execv. This must be done before the + // Seccomp rules have been applied, because we need to be able to read and + // write to a socket. + if err := syncParentReady(l.pipe); err != nil { + return err + } + // Without NoNewPrivileges seccomp is a privileged operation, so we need to + // do this before dropping capabilities; otherwise do it as late as possible + // just before execve so as few syscalls take place after it as possible. + if l.config.Config.Seccomp != nil && !l.config.NoNewPrivileges { + if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil { + return err + } + } + if err := finalizeNamespace(l.config); err != nil { + return err + } + // finalizeNamespace can change user/group which clears the parent death + // signal, so we restore it here. + if err := pdeath.Restore(); err != nil { + return err + } + // compare the parent from the initial start of the init process and make sure that it did not change. + // if the parent changes that means it died and we were reparented to something else so we should + // just kill ourself and not cause problems for someone else. + if unix.Getppid() != l.parentPid { + return unix.Kill(unix.Getpid(), unix.SIGKILL) + } + // check for the arg before waiting to make sure it exists and it is returned + // as a create time error. + name, err := exec.LookPath(l.config.Args[0]) + if err != nil { + return err + } + // close the pipe to signal that we have completed our init. + l.pipe.Close() + // wait for the fifo to be opened on the other side before + // exec'ing the users process. + fd, err := unix.Openat(l.stateDirFD, execFifoFilename, os.O_WRONLY|unix.O_CLOEXEC, 0) + if err != nil { + return newSystemErrorWithCause(err, "openat exec fifo") + } + if _, err := unix.Write(fd, []byte("0")); err != nil { + return newSystemErrorWithCause(err, "write 0 exec fifo") + } + if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges { + if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil { + return newSystemErrorWithCause(err, "init seccomp") + } + } + // close the statedir fd before exec because the kernel resets dumpable in the wrong order + // https://github.com/torvalds/linux/blob/v4.9/fs/exec.c#L1290-L1318 + unix.Close(l.stateDirFD) + if err := syscall.Exec(name, l.config.Args[0:], os.Environ()); err != nil { + return newSystemErrorWithCause(err, "exec user process") + } + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/state_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/state_linux.go new file mode 100644 index 000000000..44fa6b43a --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/state_linux.go @@ -0,0 +1,248 @@ +// +build linux + +package libcontainer + +import ( + "fmt" + "os" + "path/filepath" + + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/utils" + + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" +) + +func newStateTransitionError(from, to containerState) error { + return &stateTransitionError{ + From: from.status().String(), + To: to.status().String(), + } +} + +// stateTransitionError is returned when an invalid state transition happens from one +// state to another. +type stateTransitionError struct { + From string + To string +} + +func (s *stateTransitionError) Error() string { + return fmt.Sprintf("invalid state transition from %s to %s", s.From, s.To) +} + +type containerState interface { + transition(containerState) error + destroy() error + status() Status +} + +func destroy(c *linuxContainer) error { + if !c.config.Namespaces.Contains(configs.NEWPID) { + if err := signalAllProcesses(c.cgroupManager, unix.SIGKILL); err != nil { + logrus.Warn(err) + } + } + err := c.cgroupManager.Destroy() + if rerr := os.RemoveAll(c.root); err == nil { + err = rerr + } + c.initProcess = nil + if herr := runPoststopHooks(c); err == nil { + err = herr + } + c.state = &stoppedState{c: c} + return err +} + +func runPoststopHooks(c *linuxContainer) error { + if c.config.Hooks != nil { + s := configs.HookState{ + Version: c.config.Version, + ID: c.id, + Bundle: utils.SearchLabels(c.config.Labels, "bundle"), + } + for _, hook := range c.config.Hooks.Poststop { + if err := hook.Run(s); err != nil { + return err + } + } + } + return nil +} + +// stoppedState represents a container is a stopped/destroyed state. +type stoppedState struct { + c *linuxContainer +} + +func (b *stoppedState) status() Status { + return Stopped +} + +func (b *stoppedState) transition(s containerState) error { + switch s.(type) { + case *runningState, *restoredState: + b.c.state = s + return nil + case *stoppedState: + return nil + } + return newStateTransitionError(b, s) +} + +func (b *stoppedState) destroy() error { + return destroy(b.c) +} + +// runningState represents a container that is currently running. +type runningState struct { + c *linuxContainer +} + +func (r *runningState) status() Status { + return Running +} + +func (r *runningState) transition(s containerState) error { + switch s.(type) { + case *stoppedState: + t, err := r.c.runType() + if err != nil { + return err + } + if t == Running { + return newGenericError(fmt.Errorf("container still running"), ContainerNotStopped) + } + r.c.state = s + return nil + case *pausedState: + r.c.state = s + return nil + case *runningState: + return nil + } + return newStateTransitionError(r, s) +} + +func (r *runningState) destroy() error { + t, err := r.c.runType() + if err != nil { + return err + } + if t == Running { + return newGenericError(fmt.Errorf("container is not destroyed"), ContainerNotStopped) + } + return destroy(r.c) +} + +type createdState struct { + c *linuxContainer +} + +func (i *createdState) status() Status { + return Created +} + +func (i *createdState) transition(s containerState) error { + switch s.(type) { + case *runningState, *pausedState, *stoppedState: + i.c.state = s + return nil + case *createdState: + return nil + } + return newStateTransitionError(i, s) +} + +func (i *createdState) destroy() error { + i.c.initProcess.signal(unix.SIGKILL) + return destroy(i.c) +} + +// pausedState represents a container that is currently pause. It cannot be destroyed in a +// paused state and must transition back to running first. +type pausedState struct { + c *linuxContainer +} + +func (p *pausedState) status() Status { + return Paused +} + +func (p *pausedState) transition(s containerState) error { + switch s.(type) { + case *runningState, *stoppedState: + p.c.state = s + return nil + case *pausedState: + return nil + } + return newStateTransitionError(p, s) +} + +func (p *pausedState) destroy() error { + t, err := p.c.runType() + if err != nil { + return err + } + if t != Running && t != Created { + if err := p.c.cgroupManager.Freeze(configs.Thawed); err != nil { + return err + } + return destroy(p.c) + } + return newGenericError(fmt.Errorf("container is paused"), ContainerPaused) +} + +// restoredState is the same as the running state but also has associated checkpoint +// information that maybe need destroyed when the container is stopped and destroy is called. +type restoredState struct { + imageDir string + c *linuxContainer +} + +func (r *restoredState) status() Status { + return Running +} + +func (r *restoredState) transition(s containerState) error { + switch s.(type) { + case *stoppedState, *runningState: + return nil + } + return newStateTransitionError(r, s) +} + +func (r *restoredState) destroy() error { + if _, err := os.Stat(filepath.Join(r.c.root, "checkpoint")); err != nil { + if !os.IsNotExist(err) { + return err + } + } + return destroy(r.c) +} + +// loadedState is used whenever a container is restored, loaded, or setting additional +// processes inside and it should not be destroyed when it is exiting. +type loadedState struct { + c *linuxContainer + s Status +} + +func (n *loadedState) status() Status { + return n.s +} + +func (n *loadedState) transition(s containerState) error { + n.c.state = s + return nil +} + +func (n *loadedState) destroy() error { + if err := n.c.refreshState(); err != nil { + return err + } + return n.c.state.destroy() +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/stats.go b/vendor/github.com/opencontainers/runc/libcontainer/stats.go new file mode 100644 index 000000000..303e4b94c --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/stats.go @@ -0,0 +1,15 @@ +package libcontainer + +type NetworkInterface struct { + // Name is the name of the network interface. + Name string + + RxBytes uint64 + RxPackets uint64 + RxErrors uint64 + RxDropped uint64 + TxBytes uint64 + TxPackets uint64 + TxErrors uint64 + TxDropped uint64 +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/stats_freebsd.go b/vendor/github.com/opencontainers/runc/libcontainer/stats_freebsd.go new file mode 100644 index 000000000..f8d1d689c --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/stats_freebsd.go @@ -0,0 +1,5 @@ +package libcontainer + +type Stats struct { + Interfaces []*NetworkInterface +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/stats_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/stats_linux.go new file mode 100644 index 000000000..c629dc67d --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/stats_linux.go @@ -0,0 +1,8 @@ +package libcontainer + +import "github.com/opencontainers/runc/libcontainer/cgroups" + +type Stats struct { + Interfaces []*NetworkInterface + CgroupStats *cgroups.Stats +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/stats_solaris.go b/vendor/github.com/opencontainers/runc/libcontainer/stats_solaris.go new file mode 100644 index 000000000..da78c1c2e --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/stats_solaris.go @@ -0,0 +1,7 @@ +package libcontainer + +// Solaris - TODO + +type Stats struct { + Interfaces []*NetworkInterface +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/stats_windows.go b/vendor/github.com/opencontainers/runc/libcontainer/stats_windows.go new file mode 100644 index 000000000..f8d1d689c --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/stats_windows.go @@ -0,0 +1,5 @@ +package libcontainer + +type Stats struct { + Interfaces []*NetworkInterface +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/sync.go b/vendor/github.com/opencontainers/runc/libcontainer/sync.go new file mode 100644 index 000000000..cf7b45bc3 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/sync.go @@ -0,0 +1,107 @@ +package libcontainer + +import ( + "encoding/json" + "fmt" + "io" + + "github.com/opencontainers/runc/libcontainer/utils" +) + +type syncType string + +// Constants that are used for synchronisation between the parent and child +// during container setup. They come in pairs (with procError being a generic +// response which is followed by a &genericError). +// +// [ child ] <-> [ parent ] +// +// procHooks --> [run hooks] +// <-- procResume +// +// procConsole --> +// <-- procConsoleReq +// [send(fd)] --> [recv(fd)] +// <-- procConsoleAck +// +// procReady --> [final setup] +// <-- procRun +const ( + procError syncType = "procError" + procReady syncType = "procReady" + procRun syncType = "procRun" + procHooks syncType = "procHooks" + procResume syncType = "procResume" +) + +type syncT struct { + Type syncType `json:"type"` +} + +// writeSync is used to write to a synchronisation pipe. An error is returned +// if there was a problem writing the payload. +func writeSync(pipe io.Writer, sync syncType) error { + if err := utils.WriteJSON(pipe, syncT{sync}); err != nil { + return err + } + return nil +} + +// readSync is used to read from a synchronisation pipe. An error is returned +// if we got a genericError, the pipe was closed, or we got an unexpected flag. +func readSync(pipe io.Reader, expected syncType) error { + var procSync syncT + if err := json.NewDecoder(pipe).Decode(&procSync); err != nil { + if err == io.EOF { + return fmt.Errorf("parent closed synchronisation channel") + } + + if procSync.Type == procError { + var ierr genericError + + if err := json.NewDecoder(pipe).Decode(&ierr); err != nil { + return fmt.Errorf("failed reading error from parent: %v", err) + } + + return &ierr + } + + if procSync.Type != expected { + return fmt.Errorf("invalid synchronisation flag from parent") + } + } + return nil +} + +// parseSync runs the given callback function on each syncT received from the +// child. It will return once io.EOF is returned from the given pipe. +func parseSync(pipe io.Reader, fn func(*syncT) error) error { + dec := json.NewDecoder(pipe) + for { + var sync syncT + if err := dec.Decode(&sync); err != nil { + if err == io.EOF { + break + } + return err + } + + // We handle this case outside fn for cleanliness reasons. + var ierr *genericError + if sync.Type == procError { + if err := dec.Decode(&ierr); err != nil && err != io.EOF { + return newSystemErrorWithCause(err, "decoding proc error from init") + } + if ierr != nil { + return ierr + } + // Programmer error. + panic("No error following JSON procError payload.") + } + + if err := fn(&sync); err != nil { + return err + } + } + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/system/linux.go b/vendor/github.com/opencontainers/runc/libcontainer/system/linux.go new file mode 100644 index 000000000..4837085a7 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/system/linux.go @@ -0,0 +1,136 @@ +// +build linux + +package system + +import ( + "bufio" + "fmt" + "os" + "os/exec" + "syscall" // only for exec + "unsafe" + + "golang.org/x/sys/unix" +) + +// If arg2 is nonzero, set the "child subreaper" attribute of the +// calling process; if arg2 is zero, unset the attribute. When a +// process is marked as a child subreaper, all of the children +// that it creates, and their descendants, will be marked as +// having a subreaper. In effect, a subreaper fulfills the role +// of init(1) for its descendant processes. Upon termination of +// a process that is orphaned (i.e., its immediate parent has +// already terminated) and marked as having a subreaper, the +// nearest still living ancestor subreaper will receive a SIGCHLD +// signal and be able to wait(2) on the process to discover its +// termination status. +const PR_SET_CHILD_SUBREAPER = 36 + +type ParentDeathSignal int + +func (p ParentDeathSignal) Restore() error { + if p == 0 { + return nil + } + current, err := GetParentDeathSignal() + if err != nil { + return err + } + if p == current { + return nil + } + return p.Set() +} + +func (p ParentDeathSignal) Set() error { + return SetParentDeathSignal(uintptr(p)) +} + +func Execv(cmd string, args []string, env []string) error { + name, err := exec.LookPath(cmd) + if err != nil { + return err + } + + return syscall.Exec(name, args, env) +} + +func Prlimit(pid, resource int, limit unix.Rlimit) error { + _, _, err := unix.RawSyscall6(unix.SYS_PRLIMIT64, uintptr(pid), uintptr(resource), uintptr(unsafe.Pointer(&limit)), uintptr(unsafe.Pointer(&limit)), 0, 0) + if err != 0 { + return err + } + return nil +} + +func SetParentDeathSignal(sig uintptr) error { + if err := unix.Prctl(unix.PR_SET_PDEATHSIG, sig, 0, 0, 0); err != nil { + return err + } + return nil +} + +func GetParentDeathSignal() (ParentDeathSignal, error) { + var sig int + if err := unix.Prctl(unix.PR_GET_PDEATHSIG, uintptr(unsafe.Pointer(&sig)), 0, 0, 0); err != nil { + return -1, err + } + return ParentDeathSignal(sig), nil +} + +func SetKeepCaps() error { + if err := unix.Prctl(unix.PR_SET_KEEPCAPS, 1, 0, 0, 0); err != nil { + return err + } + + return nil +} + +func ClearKeepCaps() error { + if err := unix.Prctl(unix.PR_SET_KEEPCAPS, 0, 0, 0, 0); err != nil { + return err + } + + return nil +} + +func Setctty() error { + if err := unix.IoctlSetInt(0, unix.TIOCSCTTY, 0); err != nil { + return err + } + return nil +} + +// RunningInUserNS detects whether we are currently running in a user namespace. +// Copied from github.com/lxc/lxd/shared/util.go +func RunningInUserNS() bool { + file, err := os.Open("/proc/self/uid_map") + if err != nil { + // This kernel-provided file only exists if user namespaces are supported + return false + } + defer file.Close() + + buf := bufio.NewReader(file) + l, _, err := buf.ReadLine() + if err != nil { + return false + } + + line := string(l) + var a, b, c int64 + fmt.Sscanf(line, "%d %d %d", &a, &b, &c) + /* + * We assume we are in the initial user namespace if we have a full + * range - 4294967295 uids starting at uid 0. + */ + if a == 0 && b == 0 && c == 4294967295 { + return false + } + return true +} + +// SetSubreaper sets the value i as the subreaper setting for the calling process +func SetSubreaper(i int) error { + return unix.Prctl(PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0) +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/system/proc.go b/vendor/github.com/opencontainers/runc/libcontainer/system/proc.go new file mode 100644 index 000000000..79232a437 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/system/proc.go @@ -0,0 +1,113 @@ +package system + +import ( + "fmt" + "io/ioutil" + "path/filepath" + "strconv" + "strings" +) + +// State is the status of a process. +type State rune + +const ( // Only values for Linux 3.14 and later are listed here + Dead State = 'X' + DiskSleep State = 'D' + Running State = 'R' + Sleeping State = 'S' + Stopped State = 'T' + TracingStop State = 't' + Zombie State = 'Z' +) + +// String forms of the state from proc(5)'s documentation for +// /proc/[pid]/status' "State" field. +func (s State) String() string { + switch s { + case Dead: + return "dead" + case DiskSleep: + return "disk sleep" + case Running: + return "running" + case Sleeping: + return "sleeping" + case Stopped: + return "stopped" + case TracingStop: + return "tracing stop" + case Zombie: + return "zombie" + default: + return fmt.Sprintf("unknown (%c)", s) + } +} + +// Stat_t represents the information from /proc/[pid]/stat, as +// described in proc(5) with names based on the /proc/[pid]/status +// fields. +type Stat_t struct { + // PID is the process ID. + PID uint + + // Name is the command run by the process. + Name string + + // State is the state of the process. + State State + + // StartTime is the number of clock ticks after system boot (since + // Linux 2.6). + StartTime uint64 +} + +// Stat returns a Stat_t instance for the specified process. +func Stat(pid int) (stat Stat_t, err error) { + bytes, err := ioutil.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat")) + if err != nil { + return stat, err + } + return parseStat(string(bytes)) +} + +// GetProcessStartTime is deprecated. Use Stat(pid) and +// Stat_t.StartTime instead. +func GetProcessStartTime(pid int) (string, error) { + stat, err := Stat(pid) + if err != nil { + return "", err + } + return fmt.Sprintf("%d", stat.StartTime), nil +} + +func parseStat(data string) (stat Stat_t, err error) { + // From proc(5), field 2 could contain space and is inside `(` and `)`. + // The following is an example: + // 89653 (gunicorn: maste) S 89630 89653 89653 0 -1 4194560 29689 28896 0 3 146 32 76 19 20 0 1 0 2971844 52965376 3920 18446744073709551615 1 1 0 0 0 0 0 16781312 137447943 0 0 0 17 1 0 0 0 0 0 0 0 0 0 0 0 0 0 + i := strings.LastIndex(data, ")") + if i <= 2 || i >= len(data)-1 { + return stat, fmt.Errorf("invalid stat data: %q", data) + } + + parts := strings.SplitN(data[:i], "(", 2) + if len(parts) != 2 { + return stat, fmt.Errorf("invalid stat data: %q", data) + } + + stat.Name = parts[1] + _, err = fmt.Sscanf(parts[0], "%d", &stat.PID) + if err != nil { + return stat, err + } + + // parts indexes should be offset by 3 from the field number given + // proc(5), because parts is zero-indexed and we've removed fields + // one (PID) and two (Name) in the paren-split. + parts = strings.Split(data[i+2:], " ") + var state int + fmt.Sscanf(parts[3-3], "%c", &state) + stat.State = State(state) + fmt.Sscanf(parts[22-3], "%d", &stat.StartTime) + return stat, nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_386.go b/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_386.go new file mode 100644 index 000000000..3f7235ed1 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_386.go @@ -0,0 +1,25 @@ +// +build linux,386 + +package system + +import ( + "golang.org/x/sys/unix" +) + +// Setuid sets the uid of the calling thread to the specified uid. +func Setuid(uid int) (err error) { + _, _, e1 := unix.RawSyscall(unix.SYS_SETUID32, uintptr(uid), 0, 0) + if e1 != 0 { + err = e1 + } + return +} + +// Setgid sets the gid of the calling thread to the specified gid. +func Setgid(gid int) (err error) { + _, _, e1 := unix.RawSyscall(unix.SYS_SETGID32, uintptr(gid), 0, 0) + if e1 != 0 { + err = e1 + } + return +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_64.go b/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_64.go new file mode 100644 index 000000000..d7891a2ff --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_64.go @@ -0,0 +1,25 @@ +// +build linux,arm64 linux,amd64 linux,ppc linux,ppc64 linux,ppc64le linux,s390x + +package system + +import ( + "golang.org/x/sys/unix" +) + +// Setuid sets the uid of the calling thread to the specified uid. +func Setuid(uid int) (err error) { + _, _, e1 := unix.RawSyscall(unix.SYS_SETUID, uintptr(uid), 0, 0) + if e1 != 0 { + err = e1 + } + return +} + +// Setgid sets the gid of the calling thread to the specified gid. +func Setgid(gid int) (err error) { + _, _, e1 := unix.RawSyscall(unix.SYS_SETGID, uintptr(gid), 0, 0) + if e1 != 0 { + err = e1 + } + return +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_arm.go b/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_arm.go new file mode 100644 index 000000000..31ff3deb1 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_arm.go @@ -0,0 +1,25 @@ +// +build linux,arm + +package system + +import ( + "golang.org/x/sys/unix" +) + +// Setuid sets the uid of the calling thread to the specified uid. +func Setuid(uid int) (err error) { + _, _, e1 := unix.RawSyscall(unix.SYS_SETUID32, uintptr(uid), 0, 0) + if e1 != 0 { + err = e1 + } + return +} + +// Setgid sets the gid of the calling thread to the specified gid. +func Setgid(gid int) (err error) { + _, _, e1 := unix.RawSyscall(unix.SYS_SETGID32, uintptr(gid), 0, 0) + if e1 != 0 { + err = e1 + } + return +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/system/sysconfig.go b/vendor/github.com/opencontainers/runc/libcontainer/system/sysconfig.go new file mode 100644 index 000000000..b3a07cba3 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/system/sysconfig.go @@ -0,0 +1,12 @@ +// +build cgo,linux cgo,freebsd + +package system + +/* +#include <unistd.h> +*/ +import "C" + +func GetClockTicks() int { + return int(C.sysconf(C._SC_CLK_TCK)) +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/system/sysconfig_notcgo.go b/vendor/github.com/opencontainers/runc/libcontainer/system/sysconfig_notcgo.go new file mode 100644 index 000000000..d93b5d5fd --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/system/sysconfig_notcgo.go @@ -0,0 +1,15 @@ +// +build !cgo windows + +package system + +func GetClockTicks() int { + // TODO figure out a better alternative for platforms where we're missing cgo + // + // TODO Windows. This could be implemented using Win32 QueryPerformanceFrequency(). + // https://msdn.microsoft.com/en-us/library/windows/desktop/ms644905(v=vs.85).aspx + // + // An example of its usage can be found here. + // https://msdn.microsoft.com/en-us/library/windows/desktop/dn553408(v=vs.85).aspx + + return 100 +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/system/unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/system/unsupported.go new file mode 100644 index 000000000..e7cfd62b2 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/system/unsupported.go @@ -0,0 +1,9 @@ +// +build !linux + +package system + +// RunningInUserNS is a stub for non-Linux systems +// Always returns false +func RunningInUserNS() bool { + return false +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/system/xattrs_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/system/xattrs_linux.go new file mode 100644 index 000000000..a6823fc99 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/system/xattrs_linux.go @@ -0,0 +1,35 @@ +package system + +import "golang.org/x/sys/unix" + +// Returns a []byte slice if the xattr is set and nil otherwise +// Requires path and its attribute as arguments +func Lgetxattr(path string, attr string) ([]byte, error) { + var sz int + // Start with a 128 length byte array + dest := make([]byte, 128) + sz, errno := unix.Lgetxattr(path, attr, dest) + + switch { + case errno == unix.ENODATA: + return nil, errno + case errno == unix.ENOTSUP: + return nil, errno + case errno == unix.ERANGE: + // 128 byte array might just not be good enough, + // A dummy buffer is used to get the real size + // of the xattrs on disk + sz, errno = unix.Lgetxattr(path, attr, []byte{}) + if errno != nil { + return nil, errno + } + dest = make([]byte, sz) + sz, errno = unix.Lgetxattr(path, attr, dest) + if errno != nil { + return nil, errno + } + case errno != nil: + return nil, errno + } + return dest[:sz], nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/user/lookup.go b/vendor/github.com/opencontainers/runc/libcontainer/user/lookup.go new file mode 100644 index 000000000..95e9eebc0 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/user/lookup.go @@ -0,0 +1,95 @@ +package user + +import ( + "errors" +) + +var ( + // The current operating system does not provide the required data for user lookups. + ErrUnsupported = errors.New("user lookup: operating system does not provide passwd-formatted data") + // No matching entries found in file. + ErrNoPasswdEntries = errors.New("no matching entries in passwd file") + ErrNoGroupEntries = errors.New("no matching entries in group file") +) + +func lookupUser(filter func(u User) bool) (User, error) { + // Get operating system-specific passwd reader-closer. + passwd, err := GetPasswd() + if err != nil { + return User{}, err + } + defer passwd.Close() + + // Get the users. + users, err := ParsePasswdFilter(passwd, filter) + if err != nil { + return User{}, err + } + + // No user entries found. + if len(users) == 0 { + return User{}, ErrNoPasswdEntries + } + + // Assume the first entry is the "correct" one. + return users[0], nil +} + +// LookupUser looks up a user by their username in /etc/passwd. If the user +// cannot be found (or there is no /etc/passwd file on the filesystem), then +// LookupUser returns an error. +func LookupUser(username string) (User, error) { + return lookupUser(func(u User) bool { + return u.Name == username + }) +} + +// LookupUid looks up a user by their user id in /etc/passwd. If the user cannot +// be found (or there is no /etc/passwd file on the filesystem), then LookupId +// returns an error. +func LookupUid(uid int) (User, error) { + return lookupUser(func(u User) bool { + return u.Uid == uid + }) +} + +func lookupGroup(filter func(g Group) bool) (Group, error) { + // Get operating system-specific group reader-closer. + group, err := GetGroup() + if err != nil { + return Group{}, err + } + defer group.Close() + + // Get the users. + groups, err := ParseGroupFilter(group, filter) + if err != nil { + return Group{}, err + } + + // No user entries found. + if len(groups) == 0 { + return Group{}, ErrNoGroupEntries + } + + // Assume the first entry is the "correct" one. + return groups[0], nil +} + +// LookupGroup looks up a group by its name in /etc/group. If the group cannot +// be found (or there is no /etc/group file on the filesystem), then LookupGroup +// returns an error. +func LookupGroup(groupname string) (Group, error) { + return lookupGroup(func(g Group) bool { + return g.Name == groupname + }) +} + +// LookupGid looks up a group by its group id in /etc/group. If the group cannot +// be found (or there is no /etc/group file on the filesystem), then LookupGid +// returns an error. +func LookupGid(gid int) (Group, error) { + return lookupGroup(func(g Group) bool { + return g.Gid == gid + }) +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go b/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go new file mode 100644 index 000000000..c2bb9ec90 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go @@ -0,0 +1,46 @@ +// +build darwin dragonfly freebsd linux netbsd openbsd solaris + +package user + +import ( + "io" + "os" + + "golang.org/x/sys/unix" +) + +// Unix-specific path to the passwd and group formatted files. +const ( + unixPasswdPath = "/etc/passwd" + unixGroupPath = "/etc/group" +) + +func GetPasswdPath() (string, error) { + return unixPasswdPath, nil +} + +func GetPasswd() (io.ReadCloser, error) { + return os.Open(unixPasswdPath) +} + +func GetGroupPath() (string, error) { + return unixGroupPath, nil +} + +func GetGroup() (io.ReadCloser, error) { + return os.Open(unixGroupPath) +} + +// CurrentUser looks up the current user by their user id in /etc/passwd. If the +// user cannot be found (or there is no /etc/passwd file on the filesystem), +// then CurrentUser returns an error. +func CurrentUser() (User, error) { + return LookupUid(unix.Getuid()) +} + +// CurrentGroup looks up the current user's group by their primary group id's +// entry in /etc/passwd. If the group cannot be found (or there is no +// /etc/group file on the filesystem), then CurrentGroup returns an error. +func CurrentGroup() (Group, error) { + return LookupGid(unix.Getgid()) +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unsupported.go new file mode 100644 index 000000000..4a8d00acb --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unsupported.go @@ -0,0 +1,38 @@ +// +build !darwin,!dragonfly,!freebsd,!linux,!netbsd,!openbsd,!solaris + +package user + +import ( + "io" + "syscall" +) + +func GetPasswdPath() (string, error) { + return "", ErrUnsupported +} + +func GetPasswd() (io.ReadCloser, error) { + return nil, ErrUnsupported +} + +func GetGroupPath() (string, error) { + return "", ErrUnsupported +} + +func GetGroup() (io.ReadCloser, error) { + return nil, ErrUnsupported +} + +// CurrentUser looks up the current user by their user id in /etc/passwd. If the +// user cannot be found (or there is no /etc/passwd file on the filesystem), +// then CurrentUser returns an error. +func CurrentUser() (User, error) { + return LookupUid(syscall.Getuid()) +} + +// CurrentGroup looks up the current user's group by their primary group id's +// entry in /etc/passwd. If the group cannot be found (or there is no +// /etc/group file on the filesystem), then CurrentGroup returns an error. +func CurrentGroup() (Group, error) { + return LookupGid(syscall.Getgid()) +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/user/user.go b/vendor/github.com/opencontainers/runc/libcontainer/user/user.go new file mode 100644 index 000000000..8962cab33 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/user/user.go @@ -0,0 +1,441 @@ +package user + +import ( + "bufio" + "fmt" + "io" + "os" + "strconv" + "strings" +) + +const ( + minId = 0 + maxId = 1<<31 - 1 //for 32-bit systems compatibility +) + +var ( + ErrRange = fmt.Errorf("uids and gids must be in range %d-%d", minId, maxId) +) + +type User struct { + Name string + Pass string + Uid int + Gid int + Gecos string + Home string + Shell string +} + +type Group struct { + Name string + Pass string + Gid int + List []string +} + +func parseLine(line string, v ...interface{}) { + if line == "" { + return + } + + parts := strings.Split(line, ":") + for i, p := range parts { + // Ignore cases where we don't have enough fields to populate the arguments. + // Some configuration files like to misbehave. + if len(v) <= i { + break + } + + // Use the type of the argument to figure out how to parse it, scanf() style. + // This is legit. + switch e := v[i].(type) { + case *string: + *e = p + case *int: + // "numbers", with conversion errors ignored because of some misbehaving configuration files. + *e, _ = strconv.Atoi(p) + case *[]string: + // Comma-separated lists. + if p != "" { + *e = strings.Split(p, ",") + } else { + *e = []string{} + } + default: + // Someone goof'd when writing code using this function. Scream so they can hear us. + panic(fmt.Sprintf("parseLine only accepts {*string, *int, *[]string} as arguments! %#v is not a pointer!", e)) + } + } +} + +func ParsePasswdFile(path string) ([]User, error) { + passwd, err := os.Open(path) + if err != nil { + return nil, err + } + defer passwd.Close() + return ParsePasswd(passwd) +} + +func ParsePasswd(passwd io.Reader) ([]User, error) { + return ParsePasswdFilter(passwd, nil) +} + +func ParsePasswdFileFilter(path string, filter func(User) bool) ([]User, error) { + passwd, err := os.Open(path) + if err != nil { + return nil, err + } + defer passwd.Close() + return ParsePasswdFilter(passwd, filter) +} + +func ParsePasswdFilter(r io.Reader, filter func(User) bool) ([]User, error) { + if r == nil { + return nil, fmt.Errorf("nil source for passwd-formatted data") + } + + var ( + s = bufio.NewScanner(r) + out = []User{} + ) + + for s.Scan() { + if err := s.Err(); err != nil { + return nil, err + } + + line := strings.TrimSpace(s.Text()) + if line == "" { + continue + } + + // see: man 5 passwd + // name:password:UID:GID:GECOS:directory:shell + // Name:Pass:Uid:Gid:Gecos:Home:Shell + // root:x:0:0:root:/root:/bin/bash + // adm:x:3:4:adm:/var/adm:/bin/false + p := User{} + parseLine(line, &p.Name, &p.Pass, &p.Uid, &p.Gid, &p.Gecos, &p.Home, &p.Shell) + + if filter == nil || filter(p) { + out = append(out, p) + } + } + + return out, nil +} + +func ParseGroupFile(path string) ([]Group, error) { + group, err := os.Open(path) + if err != nil { + return nil, err + } + + defer group.Close() + return ParseGroup(group) +} + +func ParseGroup(group io.Reader) ([]Group, error) { + return ParseGroupFilter(group, nil) +} + +func ParseGroupFileFilter(path string, filter func(Group) bool) ([]Group, error) { + group, err := os.Open(path) + if err != nil { + return nil, err + } + defer group.Close() + return ParseGroupFilter(group, filter) +} + +func ParseGroupFilter(r io.Reader, filter func(Group) bool) ([]Group, error) { + if r == nil { + return nil, fmt.Errorf("nil source for group-formatted data") + } + + var ( + s = bufio.NewScanner(r) + out = []Group{} + ) + + for s.Scan() { + if err := s.Err(); err != nil { + return nil, err + } + + text := s.Text() + if text == "" { + continue + } + + // see: man 5 group + // group_name:password:GID:user_list + // Name:Pass:Gid:List + // root:x:0:root + // adm:x:4:root,adm,daemon + p := Group{} + parseLine(text, &p.Name, &p.Pass, &p.Gid, &p.List) + + if filter == nil || filter(p) { + out = append(out, p) + } + } + + return out, nil +} + +type ExecUser struct { + Uid int + Gid int + Sgids []int + Home string +} + +// GetExecUserPath is a wrapper for GetExecUser. It reads data from each of the +// given file paths and uses that data as the arguments to GetExecUser. If the +// files cannot be opened for any reason, the error is ignored and a nil +// io.Reader is passed instead. +func GetExecUserPath(userSpec string, defaults *ExecUser, passwdPath, groupPath string) (*ExecUser, error) { + var passwd, group io.Reader + + if passwdFile, err := os.Open(passwdPath); err == nil { + passwd = passwdFile + defer passwdFile.Close() + } + + if groupFile, err := os.Open(groupPath); err == nil { + group = groupFile + defer groupFile.Close() + } + + return GetExecUser(userSpec, defaults, passwd, group) +} + +// GetExecUser parses a user specification string (using the passwd and group +// readers as sources for /etc/passwd and /etc/group data, respectively). In +// the case of blank fields or missing data from the sources, the values in +// defaults is used. +// +// GetExecUser will return an error if a user or group literal could not be +// found in any entry in passwd and group respectively. +// +// Examples of valid user specifications are: +// * "" +// * "user" +// * "uid" +// * "user:group" +// * "uid:gid +// * "user:gid" +// * "uid:group" +// +// It should be noted that if you specify a numeric user or group id, they will +// not be evaluated as usernames (only the metadata will be filled). So attempting +// to parse a user with user.Name = "1337" will produce the user with a UID of +// 1337. +func GetExecUser(userSpec string, defaults *ExecUser, passwd, group io.Reader) (*ExecUser, error) { + if defaults == nil { + defaults = new(ExecUser) + } + + // Copy over defaults. + user := &ExecUser{ + Uid: defaults.Uid, + Gid: defaults.Gid, + Sgids: defaults.Sgids, + Home: defaults.Home, + } + + // Sgids slice *cannot* be nil. + if user.Sgids == nil { + user.Sgids = []int{} + } + + // Allow for userArg to have either "user" syntax, or optionally "user:group" syntax + var userArg, groupArg string + parseLine(userSpec, &userArg, &groupArg) + + // Convert userArg and groupArg to be numeric, so we don't have to execute + // Atoi *twice* for each iteration over lines. + uidArg, uidErr := strconv.Atoi(userArg) + gidArg, gidErr := strconv.Atoi(groupArg) + + // Find the matching user. + users, err := ParsePasswdFilter(passwd, func(u User) bool { + if userArg == "" { + // Default to current state of the user. + return u.Uid == user.Uid + } + + if uidErr == nil { + // If the userArg is numeric, always treat it as a UID. + return uidArg == u.Uid + } + + return u.Name == userArg + }) + + // If we can't find the user, we have to bail. + if err != nil && passwd != nil { + if userArg == "" { + userArg = strconv.Itoa(user.Uid) + } + return nil, fmt.Errorf("unable to find user %s: %v", userArg, err) + } + + var matchedUserName string + if len(users) > 0 { + // First match wins, even if there's more than one matching entry. + matchedUserName = users[0].Name + user.Uid = users[0].Uid + user.Gid = users[0].Gid + user.Home = users[0].Home + } else if userArg != "" { + // If we can't find a user with the given username, the only other valid + // option is if it's a numeric username with no associated entry in passwd. + + if uidErr != nil { + // Not numeric. + return nil, fmt.Errorf("unable to find user %s: %v", userArg, ErrNoPasswdEntries) + } + user.Uid = uidArg + + // Must be inside valid uid range. + if user.Uid < minId || user.Uid > maxId { + return nil, ErrRange + } + + // Okay, so it's numeric. We can just roll with this. + } + + // On to the groups. If we matched a username, we need to do this because of + // the supplementary group IDs. + if groupArg != "" || matchedUserName != "" { + groups, err := ParseGroupFilter(group, func(g Group) bool { + // If the group argument isn't explicit, we'll just search for it. + if groupArg == "" { + // Check if user is a member of this group. + for _, u := range g.List { + if u == matchedUserName { + return true + } + } + return false + } + + if gidErr == nil { + // If the groupArg is numeric, always treat it as a GID. + return gidArg == g.Gid + } + + return g.Name == groupArg + }) + if err != nil && group != nil { + return nil, fmt.Errorf("unable to find groups for spec %v: %v", matchedUserName, err) + } + + // Only start modifying user.Gid if it is in explicit form. + if groupArg != "" { + if len(groups) > 0 { + // First match wins, even if there's more than one matching entry. + user.Gid = groups[0].Gid + } else { + // If we can't find a group with the given name, the only other valid + // option is if it's a numeric group name with no associated entry in group. + + if gidErr != nil { + // Not numeric. + return nil, fmt.Errorf("unable to find group %s: %v", groupArg, ErrNoGroupEntries) + } + user.Gid = gidArg + + // Must be inside valid gid range. + if user.Gid < minId || user.Gid > maxId { + return nil, ErrRange + } + + // Okay, so it's numeric. We can just roll with this. + } + } else if len(groups) > 0 { + // Supplementary group ids only make sense if in the implicit form. + user.Sgids = make([]int, len(groups)) + for i, group := range groups { + user.Sgids[i] = group.Gid + } + } + } + + return user, nil +} + +// GetAdditionalGroups looks up a list of groups by name or group id +// against the given /etc/group formatted data. If a group name cannot +// be found, an error will be returned. If a group id cannot be found, +// or the given group data is nil, the id will be returned as-is +// provided it is in the legal range. +func GetAdditionalGroups(additionalGroups []string, group io.Reader) ([]int, error) { + var groups = []Group{} + if group != nil { + var err error + groups, err = ParseGroupFilter(group, func(g Group) bool { + for _, ag := range additionalGroups { + if g.Name == ag || strconv.Itoa(g.Gid) == ag { + return true + } + } + return false + }) + if err != nil { + return nil, fmt.Errorf("Unable to find additional groups %v: %v", additionalGroups, err) + } + } + + gidMap := make(map[int]struct{}) + for _, ag := range additionalGroups { + var found bool + for _, g := range groups { + // if we found a matched group either by name or gid, take the + // first matched as correct + if g.Name == ag || strconv.Itoa(g.Gid) == ag { + if _, ok := gidMap[g.Gid]; !ok { + gidMap[g.Gid] = struct{}{} + found = true + break + } + } + } + // we asked for a group but didn't find it. let's check to see + // if we wanted a numeric group + if !found { + gid, err := strconv.Atoi(ag) + if err != nil { + return nil, fmt.Errorf("Unable to find group %s", ag) + } + // Ensure gid is inside gid range. + if gid < minId || gid > maxId { + return nil, ErrRange + } + gidMap[gid] = struct{}{} + } + } + gids := []int{} + for gid := range gidMap { + gids = append(gids, gid) + } + return gids, nil +} + +// GetAdditionalGroupsPath is a wrapper around GetAdditionalGroups +// that opens the groupPath given and gives it as an argument to +// GetAdditionalGroups. +func GetAdditionalGroupsPath(additionalGroups []string, groupPath string) ([]int, error) { + var group io.Reader + + if groupFile, err := os.Open(groupPath); err == nil { + group = groupFile + defer groupFile.Close() + } + return GetAdditionalGroups(additionalGroups, group) +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go b/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go new file mode 100644 index 000000000..2cbb6491a --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go @@ -0,0 +1,95 @@ +// +build linux + +package utils + +/* + * Copyright 2016, 2017 SUSE LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import ( + "fmt" + "os" + + "golang.org/x/sys/unix" +) + +// MaxSendfdLen is the maximum length of the name of a file descriptor being +// sent using SendFd. The name of the file handle returned by RecvFd will never +// be larger than this value. +const MaxNameLen = 4096 + +// oobSpace is the size of the oob slice required to store a single FD. Note +// that unix.UnixRights appears to make the assumption that fd is always int32, +// so sizeof(fd) = 4. +var oobSpace = unix.CmsgSpace(4) + +// RecvFd waits for a file descriptor to be sent over the given AF_UNIX +// socket. The file name of the remote file descriptor will be recreated +// locally (it is sent as non-auxiliary data in the same payload). +func RecvFd(socket *os.File) (*os.File, error) { + // For some reason, unix.Recvmsg uses the length rather than the capacity + // when passing the msg_controllen and other attributes to recvmsg. So we + // have to actually set the length. + name := make([]byte, MaxNameLen) + oob := make([]byte, oobSpace) + + sockfd := socket.Fd() + n, oobn, _, _, err := unix.Recvmsg(int(sockfd), name, oob, 0) + if err != nil { + return nil, err + } + + if n >= MaxNameLen || oobn != oobSpace { + return nil, fmt.Errorf("recvfd: incorrect number of bytes read (n=%d oobn=%d)", n, oobn) + } + + // Truncate. + name = name[:n] + oob = oob[:oobn] + + scms, err := unix.ParseSocketControlMessage(oob) + if err != nil { + return nil, err + } + if len(scms) != 1 { + return nil, fmt.Errorf("recvfd: number of SCMs is not 1: %d", len(scms)) + } + scm := scms[0] + + fds, err := unix.ParseUnixRights(&scm) + if err != nil { + return nil, err + } + if len(fds) != 1 { + return nil, fmt.Errorf("recvfd: number of fds is not 1: %d", len(fds)) + } + fd := uintptr(fds[0]) + + return os.NewFile(fd, string(name)), nil +} + +// SendFd sends a file descriptor over the given AF_UNIX socket. In +// addition, the file.Name() of the given file will also be sent as +// non-auxiliary data in the same payload (allowing to send contextual +// information for a file descriptor). +func SendFd(socket, file *os.File) error { + name := []byte(file.Name()) + if len(name) >= MaxNameLen { + return fmt.Errorf("sendfd: filename too long: %s", file.Name()) + } + oob := unix.UnixRights(int(file.Fd())) + + return unix.Sendmsg(int(socket.Fd()), name, oob, nil, 0) +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go new file mode 100644 index 000000000..baa54c9ba --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go @@ -0,0 +1,127 @@ +package utils + +import ( + "crypto/rand" + "encoding/hex" + "encoding/json" + "io" + "os" + "path/filepath" + "strings" + "unsafe" + + "golang.org/x/sys/unix" +) + +const ( + exitSignalOffset = 128 +) + +// GenerateRandomName returns a new name joined with a prefix. This size +// specified is used to truncate the randomly generated value +func GenerateRandomName(prefix string, size int) (string, error) { + id := make([]byte, 32) + if _, err := io.ReadFull(rand.Reader, id); err != nil { + return "", err + } + if size > 64 { + size = 64 + } + return prefix + hex.EncodeToString(id)[:size], nil +} + +// ResolveRootfs ensures that the current working directory is +// not a symlink and returns the absolute path to the rootfs +func ResolveRootfs(uncleanRootfs string) (string, error) { + rootfs, err := filepath.Abs(uncleanRootfs) + if err != nil { + return "", err + } + return filepath.EvalSymlinks(rootfs) +} + +// ExitStatus returns the correct exit status for a process based on if it +// was signaled or exited cleanly +func ExitStatus(status unix.WaitStatus) int { + if status.Signaled() { + return exitSignalOffset + int(status.Signal()) + } + return status.ExitStatus() +} + +// WriteJSON writes the provided struct v to w using standard json marshaling +func WriteJSON(w io.Writer, v interface{}) error { + data, err := json.Marshal(v) + if err != nil { + return err + } + _, err = w.Write(data) + return err +} + +// CleanPath makes a path safe for use with filepath.Join. This is done by not +// only cleaning the path, but also (if the path is relative) adding a leading +// '/' and cleaning it (then removing the leading '/'). This ensures that a +// path resulting from prepending another path will always resolve to lexically +// be a subdirectory of the prefixed path. This is all done lexically, so paths +// that include symlinks won't be safe as a result of using CleanPath. +func CleanPath(path string) string { + // Deal with empty strings nicely. + if path == "" { + return "" + } + + // Ensure that all paths are cleaned (especially problematic ones like + // "/../../../../../" which can cause lots of issues). + path = filepath.Clean(path) + + // If the path isn't absolute, we need to do more processing to fix paths + // such as "../../../../<etc>/some/path". We also shouldn't convert absolute + // paths to relative ones. + if !filepath.IsAbs(path) { + path = filepath.Clean(string(os.PathSeparator) + path) + // This can't fail, as (by definition) all paths are relative to root. + path, _ = filepath.Rel(string(os.PathSeparator), path) + } + + // Clean the path again for good measure. + return filepath.Clean(path) +} + +// SearchLabels searches a list of key-value pairs for the provided key and +// returns the corresponding value. The pairs must be separated with '='. +func SearchLabels(labels []string, query string) string { + for _, l := range labels { + parts := strings.SplitN(l, "=", 2) + if len(parts) < 2 { + continue + } + if parts[0] == query { + return parts[1] + } + } + return "" +} + +// Annotations returns the bundle path and user defined annotations from the +// libcontainer state. We need to remove the bundle because that is a label +// added by libcontainer. +func Annotations(labels []string) (bundle string, userAnnotations map[string]string) { + userAnnotations = make(map[string]string) + for _, l := range labels { + parts := strings.SplitN(l, "=", 2) + if len(parts) < 2 { + continue + } + if parts[0] == "bundle" { + bundle = parts[1] + } else { + userAnnotations[parts[0]] = parts[1] + } + } + return +} + +func GetIntSize() int { + return int(unsafe.Sizeof(1)) +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go new file mode 100644 index 000000000..c96088988 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go @@ -0,0 +1,44 @@ +// +build !windows + +package utils + +import ( + "io/ioutil" + "os" + "strconv" + + "golang.org/x/sys/unix" +) + +func CloseExecFrom(minFd int) error { + fdList, err := ioutil.ReadDir("/proc/self/fd") + if err != nil { + return err + } + for _, fi := range fdList { + fd, err := strconv.Atoi(fi.Name()) + if err != nil { + // ignore non-numeric file names + continue + } + + if fd < minFd { + // ignore descriptors lower than our specified minimum + continue + } + + // intentionally ignore errors from unix.CloseOnExec + unix.CloseOnExec(fd) + // the cases where this might fail are basically file descriptors that have already been closed (including and especially the one that was created when ioutil.ReadDir did the "opendir" syscall) + } + return nil +} + +// NewSockPair returns a new unix socket pair +func NewSockPair(name string) (parent *os.File, child *os.File, err error) { + fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0) + if err != nil { + return nil, nil, err + } + return os.NewFile(uintptr(fds[1]), name+"-p"), os.NewFile(uintptr(fds[0]), name+"-c"), nil +} diff --git a/vendor/github.com/opencontainers/runc/vendor.conf b/vendor/github.com/opencontainers/runc/vendor.conf new file mode 100644 index 000000000..9506b5c67 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/vendor.conf @@ -0,0 +1,21 @@ +# OCI runtime-spec. When updating this, make sure you use a version tag rather +# than a commit ID so it's much more obvious what version of the spec we are +# using. +github.com/opencontainers/runtime-spec v1.0.0 +# Core libcontainer functionality. +github.com/mrunalp/fileutils ed869b029674c0e9ce4c0dfa781405c2d9946d08 +github.com/opencontainers/selinux v1.0.0-rc1 +github.com/seccomp/libseccomp-golang 32f571b70023028bd57d9288c20efbcb237f3ce0 +github.com/sirupsen/logrus a3f95b5c423586578a4e099b11a46c2479628cac +github.com/syndtr/gocapability db04d3cc01c8b54962a58ec7e491717d06cfcc16 +github.com/vishvananda/netlink 1e2e08e8a2dcdacaae3f14ac44c5cfa31361f270 +# systemd integration. +github.com/coreos/go-systemd v14 +github.com/coreos/pkg v3 +github.com/godbus/dbus v3 +github.com/golang/protobuf 18c9bb3261723cd5401db4d0c9fbc5c3b6c70fe8 +# Command-line interface. +github.com/docker/docker 0f5c9d301b9b1cca66b3ea0f9dec3b5317d3686d +github.com/docker/go-units v0.2.0 +github.com/urfave/cli d53eb991652b1d438abdd34ce4bfa3ef1539108e +golang.org/x/sys 0e0164865330d5cf1c00247be08330bf96e2f87c https://github.com/golang/sys diff --git a/vendor/github.com/opencontainers/runtime-spec/LICENSE b/vendor/github.com/opencontainers/runtime-spec/LICENSE new file mode 100644 index 000000000..bdc403653 --- /dev/null +++ b/vendor/github.com/opencontainers/runtime-spec/LICENSE @@ -0,0 +1,191 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + Copyright 2015 The Linux Foundation. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/opencontainers/runtime-spec/README.md b/vendor/github.com/opencontainers/runtime-spec/README.md new file mode 100644 index 000000000..2f7eb6086 --- /dev/null +++ b/vendor/github.com/opencontainers/runtime-spec/README.md @@ -0,0 +1,158 @@ +# Open Container Initiative Runtime Specification + +The [Open Container Initiative][oci] develops specifications for standards on Operating System process and application containers. + +The specification can be found [here](spec.md). + +## Table of Contents + +Additional documentation about how this group operates: + +- [Code of Conduct][code-of-conduct] +- [Style and Conventions](style.md) +- [Implementations](implementations.md) +- [Releases](RELEASES.md) +- [project](project.md) +- [charter][charter] + +## Use Cases + +To provide context for users the following section gives example use cases for each part of the spec. + +### Application Bundle Builders + +Application bundle builders can create a [bundle](bundle.md) directory that includes all of the files required for launching an application as a container. +The bundle contains an OCI [configuration file](config.md) where the builder can specify host-independent details such as [which executable to launch](config.md#process) and host-specific settings such as [mount](config.md#mounts) locations, [hook](config.md#hooks) paths, Linux [namespaces](config-linux.md#namespaces) and [cgroups](config-linux.md#control-groups). +Because the configuration includes host-specific settings, application bundle directories copied between two hosts may require configuration adjustments. + +### Hook Developers + +[Hook](config.md#hooks) developers can extend the functionality of an OCI-compliant runtime by hooking into a container's lifecycle with an external application. +Example use cases include sophisticated network configuration, volume garbage collection, etc. + +### Runtime Developers + +Runtime developers can build runtime implementations that run OCI-compliant bundles and container configuration, containing low-level OS and host-specific details, on a particular platform. + +## Contributing + +Development happens on GitHub for the spec. +Issues are used for bugs and actionable items and longer discussions can happen on the [mailing list](#mailing-list). + +The specification and code is licensed under the Apache 2.0 license found in the [LICENSE](./LICENSE) file. + +### Discuss your design + +The project welcomes submissions, but please let everyone know what you are working on. + +Before undertaking a nontrivial change to this specification, send mail to the [mailing list](#mailing-list) to discuss what you plan to do. +This gives everyone a chance to validate the design, helps prevent duplication of effort, and ensures that the idea fits. +It also guarantees that the design is sound before code is written; a GitHub pull-request is not the place for high-level discussions. + +Typos and grammatical errors can go straight to a pull-request. +When in doubt, start on the [mailing-list](#mailing-list). + +### Weekly Call + +The contributors and maintainers of all OCI projects have a weekly meeting on Wednesdays at: + +* 8:00 AM (USA Pacific), during [odd weeks][iso-week]. +* 2:00 PM (USA Pacific), during [even weeks][iso-week]. + +There is an [iCalendar][rfc5545] format for the meetings [here](meeting.ics). + +Everyone is welcome to participate via [UberConference web][uberconference] or audio-only: +1 415 968 0849 (no PIN needed). +An initial agenda will be posted to the [mailing list](#mailing-list) earlier in the week, and everyone is welcome to propose additional topics or suggest other agenda alterations there. +Minutes are posted to the [mailing list](#mailing-list) and minutes from past calls are archived [here][minutes], with minutes from especially old meetings (September 2015 and earlier) archived [here][runtime-wiki]. + +### Mailing List + +You can subscribe and join the mailing list on [Google Groups][dev-list]. + +### IRC + +OCI discussion happens on #opencontainers on Freenode ([logs][irc-logs]). + +### Git commit + +#### Sign your work + +The sign-off is a simple line at the end of the explanation for the patch, which certifies that you wrote it or otherwise have the right to pass it on as an open-source patch. +The rules are pretty simple: if you can certify the below (from http://developercertificate.org): + +``` +Developer Certificate of Origin +Version 1.1 + +Copyright (C) 2004, 2006 The Linux Foundation and its contributors. +660 York Street, Suite 102, +San Francisco, CA 94110 USA + +Everyone is permitted to copy and distribute verbatim copies of this +license document, but changing it is not allowed. + + +Developer's Certificate of Origin 1.1 + +By making a contribution to this project, I certify that: + +(a) The contribution was created in whole or in part by me and I + have the right to submit it under the open source license + indicated in the file; or + +(b) The contribution is based upon previous work that, to the best + of my knowledge, is covered under an appropriate open source + license and I have the right under that license to submit that + work with modifications, whether created in whole or in part + by me, under the same open source license (unless I am + permitted to submit under a different license), as indicated + in the file; or + +(c) The contribution was provided directly to me by some other + person who certified (a), (b) or (c) and I have not modified + it. + +(d) I understand and agree that this project and the contribution + are public and that a record of the contribution (including all + personal information I submit with it, including my sign-off) is + maintained indefinitely and may be redistributed consistent with + this project or the open source license(s) involved. +``` + +then you just add a line to every git commit message: + + Signed-off-by: Joe Smith <joe@gmail.com> + +using your real name (sorry, no pseudonyms or anonymous contributions.) + +You can add the sign off when creating the git commit via `git commit -s`. + +#### Commit Style + +Simple house-keeping for clean git history. +Read more on [How to Write a Git Commit Message][how-to-git-commit] or the Discussion section of [git-commit(1)][git-commit.1]. + +1. Separate the subject from body with a blank line +2. Limit the subject line to 50 characters +3. Capitalize the subject line +4. Do not end the subject line with a period +5. Use the imperative mood in the subject line +6. Wrap the body at 72 characters +7. Use the body to explain what and why vs. how + * If there was important/useful/essential conversation or information, copy or include a reference +8. When possible, one keyword to scope the change in the subject (i.e. "README: ...", "runtime: ...") + + +[charter]: https://www.opencontainers.org/about/governance +[code-of-conduct]: https://github.com/opencontainers/tob/blob/master/code-of-conduct.md +[dev-list]: https://groups.google.com/a/opencontainers.org/forum/#!forum/dev +[how-to-git-commit]: http://chris.beams.io/posts/git-commit +[irc-logs]: http://ircbot.wl.linuxfoundation.org/eavesdrop/%23opencontainers/ +[iso-week]: https://en.wikipedia.org/wiki/ISO_week_date#Calculating_the_week_number_of_a_given_date +[minutes]: http://ircbot.wl.linuxfoundation.org/meetings/opencontainers/ +[oci]: https://www.opencontainers.org +[rfc5545]: https://tools.ietf.org/html/rfc5545 +[runtime-wiki]: https://github.com/opencontainers/runtime-spec/wiki +[uberconference]: https://www.uberconference.com/opencontainers + +[git-commit.1]: http://git-scm.com/docs/git-commit diff --git a/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go b/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go new file mode 100644 index 000000000..f3f37d42d --- /dev/null +++ b/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go @@ -0,0 +1,570 @@ +package specs + +import "os" + +// Spec is the base configuration for the container. +type Spec struct { + // Version of the Open Container Runtime Specification with which the bundle complies. + Version string `json:"ociVersion"` + // Process configures the container process. + Process *Process `json:"process,omitempty"` + // Root configures the container's root filesystem. + Root *Root `json:"root,omitempty"` + // Hostname configures the container's hostname. + Hostname string `json:"hostname,omitempty"` + // Mounts configures additional mounts (on top of Root). + Mounts []Mount `json:"mounts,omitempty"` + // Hooks configures callbacks for container lifecycle events. + Hooks *Hooks `json:"hooks,omitempty" platform:"linux,solaris"` + // Annotations contains arbitrary metadata for the container. + Annotations map[string]string `json:"annotations,omitempty"` + + // Linux is platform-specific configuration for Linux based containers. + Linux *Linux `json:"linux,omitempty" platform:"linux"` + // Solaris is platform-specific configuration for Solaris based containers. + Solaris *Solaris `json:"solaris,omitempty" platform:"solaris"` + // Windows is platform-specific configuration for Windows based containers. + Windows *Windows `json:"windows,omitempty" platform:"windows"` +} + +// Process contains information to start a specific application inside the container. +type Process struct { + // Terminal creates an interactive terminal for the container. + Terminal bool `json:"terminal,omitempty"` + // ConsoleSize specifies the size of the console. + ConsoleSize *Box `json:"consoleSize,omitempty"` + // User specifies user information for the process. + User User `json:"user"` + // Args specifies the binary and arguments for the application to execute. + Args []string `json:"args"` + // Env populates the process environment for the process. + Env []string `json:"env,omitempty"` + // Cwd is the current working directory for the process and must be + // relative to the container's root. + Cwd string `json:"cwd"` + // Capabilities are Linux capabilities that are kept for the process. + Capabilities *LinuxCapabilities `json:"capabilities,omitempty" platform:"linux"` + // Rlimits specifies rlimit options to apply to the process. + Rlimits []POSIXRlimit `json:"rlimits,omitempty" platform:"linux,solaris"` + // NoNewPrivileges controls whether additional privileges could be gained by processes in the container. + NoNewPrivileges bool `json:"noNewPrivileges,omitempty" platform:"linux"` + // ApparmorProfile specifies the apparmor profile for the container. + ApparmorProfile string `json:"apparmorProfile,omitempty" platform:"linux"` + // Specify an oom_score_adj for the container. + OOMScoreAdj *int `json:"oomScoreAdj,omitempty" platform:"linux"` + // SelinuxLabel specifies the selinux context that the container process is run as. + SelinuxLabel string `json:"selinuxLabel,omitempty" platform:"linux"` +} + +// LinuxCapabilities specifies the whitelist of capabilities that are kept for a process. +// http://man7.org/linux/man-pages/man7/capabilities.7.html +type LinuxCapabilities struct { + // Bounding is the set of capabilities checked by the kernel. + Bounding []string `json:"bounding,omitempty" platform:"linux"` + // Effective is the set of capabilities checked by the kernel. + Effective []string `json:"effective,omitempty" platform:"linux"` + // Inheritable is the capabilities preserved across execve. + Inheritable []string `json:"inheritable,omitempty" platform:"linux"` + // Permitted is the limiting superset for effective capabilities. + Permitted []string `json:"permitted,omitempty" platform:"linux"` + // Ambient is the ambient set of capabilities that are kept. + Ambient []string `json:"ambient,omitempty" platform:"linux"` +} + +// Box specifies dimensions of a rectangle. Used for specifying the size of a console. +type Box struct { + // Height is the vertical dimension of a box. + Height uint `json:"height"` + // Width is the horizontal dimension of a box. + Width uint `json:"width"` +} + +// User specifies specific user (and group) information for the container process. +type User struct { + // UID is the user id. + UID uint32 `json:"uid" platform:"linux,solaris"` + // GID is the group id. + GID uint32 `json:"gid" platform:"linux,solaris"` + // AdditionalGids are additional group ids set for the container's process. + AdditionalGids []uint32 `json:"additionalGids,omitempty" platform:"linux,solaris"` + // Username is the user name. + Username string `json:"username,omitempty" platform:"windows"` +} + +// Root contains information about the container's root filesystem on the host. +type Root struct { + // Path is the absolute path to the container's root filesystem. + Path string `json:"path"` + // Readonly makes the root filesystem for the container readonly before the process is executed. + Readonly bool `json:"readonly,omitempty"` +} + +// Mount specifies a mount for a container. +type Mount struct { + // Destination is the absolute path where the mount will be placed in the container. + Destination string `json:"destination"` + // Type specifies the mount kind. + Type string `json:"type,omitempty" platform:"linux,solaris"` + // Source specifies the source path of the mount. + Source string `json:"source,omitempty"` + // Options are fstab style mount options. + Options []string `json:"options,omitempty"` +} + +// Hook specifies a command that is run at a particular event in the lifecycle of a container +type Hook struct { + Path string `json:"path"` + Args []string `json:"args,omitempty"` + Env []string `json:"env,omitempty"` + Timeout *int `json:"timeout,omitempty"` +} + +// Hooks for container setup and teardown +type Hooks struct { + // Prestart is a list of hooks to be run before the container process is executed. + Prestart []Hook `json:"prestart,omitempty"` + // Poststart is a list of hooks to be run after the container process is started. + Poststart []Hook `json:"poststart,omitempty"` + // Poststop is a list of hooks to be run after the container process exits. + Poststop []Hook `json:"poststop,omitempty"` +} + +// Linux contains platform-specific configuration for Linux based containers. +type Linux struct { + // UIDMapping specifies user mappings for supporting user namespaces. + UIDMappings []LinuxIDMapping `json:"uidMappings,omitempty"` + // GIDMapping specifies group mappings for supporting user namespaces. + GIDMappings []LinuxIDMapping `json:"gidMappings,omitempty"` + // Sysctl are a set of key value pairs that are set for the container on start + Sysctl map[string]string `json:"sysctl,omitempty"` + // Resources contain cgroup information for handling resource constraints + // for the container + Resources *LinuxResources `json:"resources,omitempty"` + // CgroupsPath specifies the path to cgroups that are created and/or joined by the container. + // The path is expected to be relative to the cgroups mountpoint. + // If resources are specified, the cgroups at CgroupsPath will be updated based on resources. + CgroupsPath string `json:"cgroupsPath,omitempty"` + // Namespaces contains the namespaces that are created and/or joined by the container + Namespaces []LinuxNamespace `json:"namespaces,omitempty"` + // Devices are a list of device nodes that are created for the container + Devices []LinuxDevice `json:"devices,omitempty"` + // Seccomp specifies the seccomp security settings for the container. + Seccomp *LinuxSeccomp `json:"seccomp,omitempty"` + // RootfsPropagation is the rootfs mount propagation mode for the container. + RootfsPropagation string `json:"rootfsPropagation,omitempty"` + // MaskedPaths masks over the provided paths inside the container. + MaskedPaths []string `json:"maskedPaths,omitempty"` + // ReadonlyPaths sets the provided paths as RO inside the container. + ReadonlyPaths []string `json:"readonlyPaths,omitempty"` + // MountLabel specifies the selinux context for the mounts in the container. + MountLabel string `json:"mountLabel,omitempty"` + // IntelRdt contains Intel Resource Director Technology (RDT) information + // for handling resource constraints (e.g., L3 cache) for the container + IntelRdt *LinuxIntelRdt `json:"intelRdt,omitempty"` +} + +// LinuxNamespace is the configuration for a Linux namespace +type LinuxNamespace struct { + // Type is the type of namespace + Type LinuxNamespaceType `json:"type"` + // Path is a path to an existing namespace persisted on disk that can be joined + // and is of the same type + Path string `json:"path,omitempty"` +} + +// LinuxNamespaceType is one of the Linux namespaces +type LinuxNamespaceType string + +const ( + // PIDNamespace for isolating process IDs + PIDNamespace LinuxNamespaceType = "pid" + // NetworkNamespace for isolating network devices, stacks, ports, etc + NetworkNamespace = "network" + // MountNamespace for isolating mount points + MountNamespace = "mount" + // IPCNamespace for isolating System V IPC, POSIX message queues + IPCNamespace = "ipc" + // UTSNamespace for isolating hostname and NIS domain name + UTSNamespace = "uts" + // UserNamespace for isolating user and group IDs + UserNamespace = "user" + // CgroupNamespace for isolating cgroup hierarchies + CgroupNamespace = "cgroup" +) + +// LinuxIDMapping specifies UID/GID mappings +type LinuxIDMapping struct { + // HostID is the starting UID/GID on the host to be mapped to 'ContainerID' + HostID uint32 `json:"hostID"` + // ContainerID is the starting UID/GID in the container + ContainerID uint32 `json:"containerID"` + // Size is the number of IDs to be mapped + Size uint32 `json:"size"` +} + +// POSIXRlimit type and restrictions +type POSIXRlimit struct { + // Type of the rlimit to set + Type string `json:"type"` + // Hard is the hard limit for the specified type + Hard uint64 `json:"hard"` + // Soft is the soft limit for the specified type + Soft uint64 `json:"soft"` +} + +// LinuxHugepageLimit structure corresponds to limiting kernel hugepages +type LinuxHugepageLimit struct { + // Pagesize is the hugepage size + Pagesize string `json:"pageSize"` + // Limit is the limit of "hugepagesize" hugetlb usage + Limit uint64 `json:"limit"` +} + +// LinuxInterfacePriority for network interfaces +type LinuxInterfacePriority struct { + // Name is the name of the network interface + Name string `json:"name"` + // Priority for the interface + Priority uint32 `json:"priority"` +} + +// linuxBlockIODevice holds major:minor format supported in blkio cgroup +type linuxBlockIODevice struct { + // Major is the device's major number. + Major int64 `json:"major"` + // Minor is the device's minor number. + Minor int64 `json:"minor"` +} + +// LinuxWeightDevice struct holds a `major:minor weight` pair for weightDevice +type LinuxWeightDevice struct { + linuxBlockIODevice + // Weight is the bandwidth rate for the device. + Weight *uint16 `json:"weight,omitempty"` + // LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, CFQ scheduler only + LeafWeight *uint16 `json:"leafWeight,omitempty"` +} + +// LinuxThrottleDevice struct holds a `major:minor rate_per_second` pair +type LinuxThrottleDevice struct { + linuxBlockIODevice + // Rate is the IO rate limit per cgroup per device + Rate uint64 `json:"rate"` +} + +// LinuxBlockIO for Linux cgroup 'blkio' resource management +type LinuxBlockIO struct { + // Specifies per cgroup weight + Weight *uint16 `json:"weight,omitempty"` + // Specifies tasks' weight in the given cgroup while competing with the cgroup's child cgroups, CFQ scheduler only + LeafWeight *uint16 `json:"leafWeight,omitempty"` + // Weight per cgroup per device, can override BlkioWeight + WeightDevice []LinuxWeightDevice `json:"weightDevice,omitempty"` + // IO read rate limit per cgroup per device, bytes per second + ThrottleReadBpsDevice []LinuxThrottleDevice `json:"throttleReadBpsDevice,omitempty"` + // IO write rate limit per cgroup per device, bytes per second + ThrottleWriteBpsDevice []LinuxThrottleDevice `json:"throttleWriteBpsDevice,omitempty"` + // IO read rate limit per cgroup per device, IO per second + ThrottleReadIOPSDevice []LinuxThrottleDevice `json:"throttleReadIOPSDevice,omitempty"` + // IO write rate limit per cgroup per device, IO per second + ThrottleWriteIOPSDevice []LinuxThrottleDevice `json:"throttleWriteIOPSDevice,omitempty"` +} + +// LinuxMemory for Linux cgroup 'memory' resource management +type LinuxMemory struct { + // Memory limit (in bytes). + Limit *int64 `json:"limit,omitempty"` + // Memory reservation or soft_limit (in bytes). + Reservation *int64 `json:"reservation,omitempty"` + // Total memory limit (memory + swap). + Swap *int64 `json:"swap,omitempty"` + // Kernel memory limit (in bytes). + Kernel *int64 `json:"kernel,omitempty"` + // Kernel memory limit for tcp (in bytes) + KernelTCP *int64 `json:"kernelTCP,omitempty"` + // How aggressive the kernel will swap memory pages. + Swappiness *uint64 `json:"swappiness,omitempty"` + // DisableOOMKiller disables the OOM killer for out of memory conditions + DisableOOMKiller *bool `json:"disableOOMKiller,omitempty"` +} + +// LinuxCPU for Linux cgroup 'cpu' resource management +type LinuxCPU struct { + // CPU shares (relative weight (ratio) vs. other cgroups with cpu shares). + Shares *uint64 `json:"shares,omitempty"` + // CPU hardcap limit (in usecs). Allowed cpu time in a given period. + Quota *int64 `json:"quota,omitempty"` + // CPU period to be used for hardcapping (in usecs). + Period *uint64 `json:"period,omitempty"` + // How much time realtime scheduling may use (in usecs). + RealtimeRuntime *int64 `json:"realtimeRuntime,omitempty"` + // CPU period to be used for realtime scheduling (in usecs). + RealtimePeriod *uint64 `json:"realtimePeriod,omitempty"` + // CPUs to use within the cpuset. Default is to use any CPU available. + Cpus string `json:"cpus,omitempty"` + // List of memory nodes in the cpuset. Default is to use any available memory node. + Mems string `json:"mems,omitempty"` +} + +// LinuxPids for Linux cgroup 'pids' resource management (Linux 4.3) +type LinuxPids struct { + // Maximum number of PIDs. Default is "no limit". + Limit int64 `json:"limit"` +} + +// LinuxNetwork identification and priority configuration +type LinuxNetwork struct { + // Set class identifier for container's network packets + ClassID *uint32 `json:"classID,omitempty"` + // Set priority of network traffic for container + Priorities []LinuxInterfacePriority `json:"priorities,omitempty"` +} + +// LinuxResources has container runtime resource constraints +type LinuxResources struct { + // Devices configures the device whitelist. + Devices []LinuxDeviceCgroup `json:"devices,omitempty"` + // Memory restriction configuration + Memory *LinuxMemory `json:"memory,omitempty"` + // CPU resource restriction configuration + CPU *LinuxCPU `json:"cpu,omitempty"` + // Task resource restriction configuration. + Pids *LinuxPids `json:"pids,omitempty"` + // BlockIO restriction configuration + BlockIO *LinuxBlockIO `json:"blockIO,omitempty"` + // Hugetlb limit (in bytes) + HugepageLimits []LinuxHugepageLimit `json:"hugepageLimits,omitempty"` + // Network restriction configuration + Network *LinuxNetwork `json:"network,omitempty"` +} + +// LinuxDevice represents the mknod information for a Linux special device file +type LinuxDevice struct { + // Path to the device. + Path string `json:"path"` + // Device type, block, char, etc. + Type string `json:"type"` + // Major is the device's major number. + Major int64 `json:"major"` + // Minor is the device's minor number. + Minor int64 `json:"minor"` + // FileMode permission bits for the device. + FileMode *os.FileMode `json:"fileMode,omitempty"` + // UID of the device. + UID *uint32 `json:"uid,omitempty"` + // Gid of the device. + GID *uint32 `json:"gid,omitempty"` +} + +// LinuxDeviceCgroup represents a device rule for the whitelist controller +type LinuxDeviceCgroup struct { + // Allow or deny + Allow bool `json:"allow"` + // Device type, block, char, etc. + Type string `json:"type,omitempty"` + // Major is the device's major number. + Major *int64 `json:"major,omitempty"` + // Minor is the device's minor number. + Minor *int64 `json:"minor,omitempty"` + // Cgroup access permissions format, rwm. + Access string `json:"access,omitempty"` +} + +// Solaris contains platform-specific configuration for Solaris application containers. +type Solaris struct { + // SMF FMRI which should go "online" before we start the container process. + Milestone string `json:"milestone,omitempty"` + // Maximum set of privileges any process in this container can obtain. + LimitPriv string `json:"limitpriv,omitempty"` + // The maximum amount of shared memory allowed for this container. + MaxShmMemory string `json:"maxShmMemory,omitempty"` + // Specification for automatic creation of network resources for this container. + Anet []SolarisAnet `json:"anet,omitempty"` + // Set limit on the amount of CPU time that can be used by container. + CappedCPU *SolarisCappedCPU `json:"cappedCPU,omitempty"` + // The physical and swap caps on the memory that can be used by this container. + CappedMemory *SolarisCappedMemory `json:"cappedMemory,omitempty"` +} + +// SolarisCappedCPU allows users to set limit on the amount of CPU time that can be used by container. +type SolarisCappedCPU struct { + Ncpus string `json:"ncpus,omitempty"` +} + +// SolarisCappedMemory allows users to set the physical and swap caps on the memory that can be used by this container. +type SolarisCappedMemory struct { + Physical string `json:"physical,omitempty"` + Swap string `json:"swap,omitempty"` +} + +// SolarisAnet provides the specification for automatic creation of network resources for this container. +type SolarisAnet struct { + // Specify a name for the automatically created VNIC datalink. + Linkname string `json:"linkname,omitempty"` + // Specify the link over which the VNIC will be created. + Lowerlink string `json:"lowerLink,omitempty"` + // The set of IP addresses that the container can use. + Allowedaddr string `json:"allowedAddress,omitempty"` + // Specifies whether allowedAddress limitation is to be applied to the VNIC. + Configallowedaddr string `json:"configureAllowedAddress,omitempty"` + // The value of the optional default router. + Defrouter string `json:"defrouter,omitempty"` + // Enable one or more types of link protection. + Linkprotection string `json:"linkProtection,omitempty"` + // Set the VNIC's macAddress + Macaddress string `json:"macAddress,omitempty"` +} + +// Windows defines the runtime configuration for Windows based containers, including Hyper-V containers. +type Windows struct { + // LayerFolders contains a list of absolute paths to directories containing image layers. + LayerFolders []string `json:"layerFolders"` + // Resources contains information for handling resource constraints for the container. + Resources *WindowsResources `json:"resources,omitempty"` + // CredentialSpec contains a JSON object describing a group Managed Service Account (gMSA) specification. + CredentialSpec interface{} `json:"credentialSpec,omitempty"` + // Servicing indicates if the container is being started in a mode to apply a Windows Update servicing operation. + Servicing bool `json:"servicing,omitempty"` + // IgnoreFlushesDuringBoot indicates if the container is being started in a mode where disk writes are not flushed during its boot process. + IgnoreFlushesDuringBoot bool `json:"ignoreFlushesDuringBoot,omitempty"` + // HyperV contains information for running a container with Hyper-V isolation. + HyperV *WindowsHyperV `json:"hyperv,omitempty"` + // Network restriction configuration. + Network *WindowsNetwork `json:"network,omitempty"` +} + +// WindowsResources has container runtime resource constraints for containers running on Windows. +type WindowsResources struct { + // Memory restriction configuration. + Memory *WindowsMemoryResources `json:"memory,omitempty"` + // CPU resource restriction configuration. + CPU *WindowsCPUResources `json:"cpu,omitempty"` + // Storage restriction configuration. + Storage *WindowsStorageResources `json:"storage,omitempty"` +} + +// WindowsMemoryResources contains memory resource management settings. +type WindowsMemoryResources struct { + // Memory limit in bytes. + Limit *uint64 `json:"limit,omitempty"` +} + +// WindowsCPUResources contains CPU resource management settings. +type WindowsCPUResources struct { + // Number of CPUs available to the container. + Count *uint64 `json:"count,omitempty"` + // CPU shares (relative weight to other containers with cpu shares). + Shares *uint16 `json:"shares,omitempty"` + // Specifies the portion of processor cycles that this container can use as a percentage times 100. + Maximum *uint16 `json:"maximum,omitempty"` +} + +// WindowsStorageResources contains storage resource management settings. +type WindowsStorageResources struct { + // Specifies maximum Iops for the system drive. + Iops *uint64 `json:"iops,omitempty"` + // Specifies maximum bytes per second for the system drive. + Bps *uint64 `json:"bps,omitempty"` + // Sandbox size specifies the minimum size of the system drive in bytes. + SandboxSize *uint64 `json:"sandboxSize,omitempty"` +} + +// WindowsNetwork contains network settings for Windows containers. +type WindowsNetwork struct { + // List of HNS endpoints that the container should connect to. + EndpointList []string `json:"endpointList,omitempty"` + // Specifies if unqualified DNS name resolution is allowed. + AllowUnqualifiedDNSQuery bool `json:"allowUnqualifiedDNSQuery,omitempty"` + // Comma separated list of DNS suffixes to use for name resolution. + DNSSearchList []string `json:"DNSSearchList,omitempty"` + // Name (ID) of the container that we will share with the network stack. + NetworkSharedContainerName string `json:"networkSharedContainerName,omitempty"` +} + +// WindowsHyperV contains information for configuring a container to run with Hyper-V isolation. +type WindowsHyperV struct { + // UtilityVMPath is an optional path to the image used for the Utility VM. + UtilityVMPath string `json:"utilityVMPath,omitempty"` +} + +// LinuxSeccomp represents syscall restrictions +type LinuxSeccomp struct { + DefaultAction LinuxSeccompAction `json:"defaultAction"` + Architectures []Arch `json:"architectures,omitempty"` + Syscalls []LinuxSyscall `json:"syscalls,omitempty"` +} + +// Arch used for additional architectures +type Arch string + +// Additional architectures permitted to be used for system calls +// By default only the native architecture of the kernel is permitted +const ( + ArchX86 Arch = "SCMP_ARCH_X86" + ArchX86_64 Arch = "SCMP_ARCH_X86_64" + ArchX32 Arch = "SCMP_ARCH_X32" + ArchARM Arch = "SCMP_ARCH_ARM" + ArchAARCH64 Arch = "SCMP_ARCH_AARCH64" + ArchMIPS Arch = "SCMP_ARCH_MIPS" + ArchMIPS64 Arch = "SCMP_ARCH_MIPS64" + ArchMIPS64N32 Arch = "SCMP_ARCH_MIPS64N32" + ArchMIPSEL Arch = "SCMP_ARCH_MIPSEL" + ArchMIPSEL64 Arch = "SCMP_ARCH_MIPSEL64" + ArchMIPSEL64N32 Arch = "SCMP_ARCH_MIPSEL64N32" + ArchPPC Arch = "SCMP_ARCH_PPC" + ArchPPC64 Arch = "SCMP_ARCH_PPC64" + ArchPPC64LE Arch = "SCMP_ARCH_PPC64LE" + ArchS390 Arch = "SCMP_ARCH_S390" + ArchS390X Arch = "SCMP_ARCH_S390X" + ArchPARISC Arch = "SCMP_ARCH_PARISC" + ArchPARISC64 Arch = "SCMP_ARCH_PARISC64" +) + +// LinuxSeccompAction taken upon Seccomp rule match +type LinuxSeccompAction string + +// Define actions for Seccomp rules +const ( + ActKill LinuxSeccompAction = "SCMP_ACT_KILL" + ActTrap LinuxSeccompAction = "SCMP_ACT_TRAP" + ActErrno LinuxSeccompAction = "SCMP_ACT_ERRNO" + ActTrace LinuxSeccompAction = "SCMP_ACT_TRACE" + ActAllow LinuxSeccompAction = "SCMP_ACT_ALLOW" +) + +// LinuxSeccompOperator used to match syscall arguments in Seccomp +type LinuxSeccompOperator string + +// Define operators for syscall arguments in Seccomp +const ( + OpNotEqual LinuxSeccompOperator = "SCMP_CMP_NE" + OpLessThan LinuxSeccompOperator = "SCMP_CMP_LT" + OpLessEqual LinuxSeccompOperator = "SCMP_CMP_LE" + OpEqualTo LinuxSeccompOperator = "SCMP_CMP_EQ" + OpGreaterEqual LinuxSeccompOperator = "SCMP_CMP_GE" + OpGreaterThan LinuxSeccompOperator = "SCMP_CMP_GT" + OpMaskedEqual LinuxSeccompOperator = "SCMP_CMP_MASKED_EQ" +) + +// LinuxSeccompArg used for matching specific syscall arguments in Seccomp +type LinuxSeccompArg struct { + Index uint `json:"index"` + Value uint64 `json:"value"` + ValueTwo uint64 `json:"valueTwo,omitempty"` + Op LinuxSeccompOperator `json:"op"` +} + +// LinuxSyscall is used to match a syscall in Seccomp +type LinuxSyscall struct { + Names []string `json:"names"` + Action LinuxSeccompAction `json:"action"` + Args []LinuxSeccompArg `json:"args,omitempty"` +} + +// LinuxIntelRdt has container runtime resource constraints +// for Intel RDT/CAT which introduced in Linux 4.10 kernel +type LinuxIntelRdt struct { + // The schema for L3 cache id and capacity bitmask (CBM) + // Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..." + L3CacheSchema string `json:"l3CacheSchema,omitempty"` +} diff --git a/vendor/github.com/opencontainers/runtime-spec/specs-go/state.go b/vendor/github.com/opencontainers/runtime-spec/specs-go/state.go new file mode 100644 index 000000000..89dce34be --- /dev/null +++ b/vendor/github.com/opencontainers/runtime-spec/specs-go/state.go @@ -0,0 +1,17 @@ +package specs + +// State holds information about the runtime state of the container. +type State struct { + // Version is the version of the specification that is supported. + Version string `json:"ociVersion"` + // ID is the container ID + ID string `json:"id"` + // Status is the runtime status of the container. + Status string `json:"status"` + // Pid is the process ID for the container process. + Pid int `json:"pid,omitempty"` + // Bundle is the path to the container's bundle directory. + Bundle string `json:"bundle"` + // Annotations are key values associated with the container. + Annotations map[string]string `json:"annotations,omitempty"` +} diff --git a/vendor/github.com/opencontainers/runtime-spec/specs-go/version.go b/vendor/github.com/opencontainers/runtime-spec/specs-go/version.go new file mode 100644 index 000000000..926ce6650 --- /dev/null +++ b/vendor/github.com/opencontainers/runtime-spec/specs-go/version.go @@ -0,0 +1,18 @@ +package specs + +import "fmt" + +const ( + // VersionMajor is for an API incompatible changes + VersionMajor = 1 + // VersionMinor is for functionality in a backwards-compatible manner + VersionMinor = 0 + // VersionPatch is for backwards-compatible bug fixes + VersionPatch = 0 + + // VersionDev indicates development branch. Releases will be empty string. + VersionDev = "" +) + +// Version is the specification version that the package types support. +var Version = fmt.Sprintf("%d.%d.%d%s", VersionMajor, VersionMinor, VersionPatch, VersionDev) diff --git a/vendor/github.com/opencontainers/runtime-tools/LICENSE b/vendor/github.com/opencontainers/runtime-tools/LICENSE new file mode 100644 index 000000000..bdc403653 --- /dev/null +++ b/vendor/github.com/opencontainers/runtime-tools/LICENSE @@ -0,0 +1,191 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + Copyright 2015 The Linux Foundation. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/opencontainers/runtime-tools/README.md b/vendor/github.com/opencontainers/runtime-tools/README.md new file mode 100644 index 000000000..bbcafc26e --- /dev/null +++ b/vendor/github.com/opencontainers/runtime-tools/README.md @@ -0,0 +1,84 @@ +# oci-runtime-tool [![Build Status](https://travis-ci.org/opencontainers/runtime-tools.svg?branch=master)](https://travis-ci.org/opencontainers/runtime-tools) [![Go Report Card](https://goreportcard.com/badge/github.com/opencontainers/runtime-tools)](https://goreportcard.com/report/github.com/opencontainers/runtime-tools) + +oci-runtime-tool is a collection of tools for working with the [OCI runtime specification][runtime-spec]. +To build from source code, runtime-tools requires Go 1.7.x or above. + +## Generating an OCI runtime spec configuration files + +[`oci-runtime-tool generate`][generate.1] generates [configuration JSON][config.json] for an [OCI bundle][bundle]. +[OCI-compatible runtimes][runtime-spec] like [runC][] expect to read the configuration from `config.json`. + +```sh +$ oci-runtime-tool generate --output config.json +$ cat config.json +{ + "ociVersion": "0.5.0", + … +} +``` + +## Validating an OCI bundle + +[`oci-runtime-tool validate`][validate.1] validates an OCI bundle. +The error message will be printed if the OCI bundle failed the validation procedure. + +```sh +$ oci-runtime-tool generate +$ oci-runtime-tool validate +INFO[0000] Bundle validation succeeded. +``` + +## Testing OCI runtimes + +```sh +$ sudo make RUNTIME=runc localvalidation +RUNTIME=runc go test -tags "" -v github.com/opencontainers/runtime-tools/validation +=== RUN TestValidateBasic +TAP version 13 +ok 1 - root filesystem +ok 2 - hostname +ok 3 - mounts +ok 4 - capabilities +ok 5 - default symlinks +ok 6 - default devices +ok 7 - linux devices +ok 8 - linux process +ok 9 - masked paths +ok 10 - oom score adj +ok 11 - read only paths +ok 12 - rlimits +ok 13 - sysctls +ok 14 - uid mappings +ok 15 - gid mappings +1..15 +--- PASS: TestValidateBasic (0.08s) +=== RUN TestValidateSysctls +TAP version 13 +ok 1 - root filesystem +ok 2 - hostname +ok 3 - mounts +ok 4 - capabilities +ok 5 - default symlinks +ok 6 - default devices +ok 7 - linux devices +ok 8 - linux process +ok 9 - masked paths +ok 10 - oom score adj +ok 11 - read only paths +ok 12 - rlimits +ok 13 - sysctls +ok 14 - uid mappings +ok 15 - gid mappings +1..15 +--- PASS: TestValidateSysctls (0.20s) +PASS +ok github.com/opencontainers/runtime-tools/validation 0.281s +``` + +[bundle]: https://github.com/opencontainers/runtime-spec/blob/master/bundle.md +[config.json]: https://github.com/opencontainers/runtime-spec/blob/master/config.md +[runC]: https://github.com/opencontainers/runc +[runtime-spec]: https://github.com/opencontainers/runtime-spec + +[generate.1]: man/oci-runtime-tool-generate.1.md +[validate.1]: man/oci-runtime-tool-validate.1.md diff --git a/vendor/github.com/opencontainers/runtime-tools/error/error.go b/vendor/github.com/opencontainers/runtime-tools/error/error.go new file mode 100644 index 000000000..f5a90800e --- /dev/null +++ b/vendor/github.com/opencontainers/runtime-tools/error/error.go @@ -0,0 +1,92 @@ +// Package error implements generic tooling for tracking RFC 2119 +// violations and linking back to the appropriate specification section. +package error + +import ( + "fmt" + "strings" +) + +// Level represents the RFC 2119 compliance levels +type Level int + +const ( + // MAY-level + + // May represents 'MAY' in RFC 2119. + May Level = iota + // Optional represents 'OPTIONAL' in RFC 2119. + Optional + + // SHOULD-level + + // Should represents 'SHOULD' in RFC 2119. + Should + // ShouldNot represents 'SHOULD NOT' in RFC 2119. + ShouldNot + // Recommended represents 'RECOMMENDED' in RFC 2119. + Recommended + // NotRecommended represents 'NOT RECOMMENDED' in RFC 2119. + NotRecommended + + // MUST-level + + // Must represents 'MUST' in RFC 2119 + Must + // MustNot represents 'MUST NOT' in RFC 2119. + MustNot + // Shall represents 'SHALL' in RFC 2119. + Shall + // ShallNot represents 'SHALL NOT' in RFC 2119. + ShallNot + // Required represents 'REQUIRED' in RFC 2119. + Required +) + +// Error represents an error with compliance level and specification reference. +type Error struct { + // Level represents the RFC 2119 compliance level. + Level Level + + // Reference is a URL for the violated specification requirement. + Reference string + + // Err holds additional details about the violation. + Err error +} + +// ParseLevel takes a string level and returns the RFC 2119 compliance level constant. +func ParseLevel(level string) (Level, error) { + switch strings.ToUpper(level) { + case "MAY": + fallthrough + case "OPTIONAL": + return May, nil + case "SHOULD": + fallthrough + case "SHOULDNOT": + fallthrough + case "RECOMMENDED": + fallthrough + case "NOTRECOMMENDED": + return Should, nil + case "MUST": + fallthrough + case "MUSTNOT": + fallthrough + case "SHALL": + fallthrough + case "SHALLNOT": + fallthrough + case "REQUIRED": + return Must, nil + } + + var l Level + return l, fmt.Errorf("%q is not a valid compliance level", level) +} + +// Error returns the error message with specification reference. +func (err *Error) Error() string { + return fmt.Sprintf("%s\nRefer to: %s", err.Err.Error(), err.Reference) +} diff --git a/vendor/github.com/opencontainers/runtime-tools/generate/generate.go b/vendor/github.com/opencontainers/runtime-tools/generate/generate.go new file mode 100644 index 000000000..fce88f5e2 --- /dev/null +++ b/vendor/github.com/opencontainers/runtime-tools/generate/generate.go @@ -0,0 +1,1256 @@ +// Package generate implements functions generating container config files. +package generate + +import ( + "encoding/json" + "fmt" + "io" + "os" + "strings" + + rspec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/runtime-tools/generate/seccomp" + "github.com/opencontainers/runtime-tools/validate" + "github.com/syndtr/gocapability/capability" +) + +var ( + // Namespaces include the names of supported namespaces. + Namespaces = []string{"network", "pid", "mount", "ipc", "uts", "user", "cgroup"} +) + +// Generator represents a generator for a container spec. +type Generator struct { + spec *rspec.Spec + HostSpecific bool +} + +// ExportOptions have toggles for exporting only certain parts of the specification +type ExportOptions struct { + Seccomp bool // seccomp toggles if only seccomp should be exported +} + +// New creates a spec Generator with the default spec. +func New() Generator { + spec := rspec.Spec{ + Version: rspec.Version, + Root: &rspec.Root{ + Path: "", + Readonly: false, + }, + Process: &rspec.Process{ + Terminal: false, + User: rspec.User{}, + Args: []string{ + "sh", + }, + Env: []string{ + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + "TERM=xterm", + }, + Cwd: "/", + Capabilities: &rspec.LinuxCapabilities{ + Bounding: []string{ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE", + }, + Permitted: []string{ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE", + }, + Inheritable: []string{ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE", + }, + Effective: []string{ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE", + }, + Ambient: []string{ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE", + }, + }, + Rlimits: []rspec.POSIXRlimit{ + { + Type: "RLIMIT_NOFILE", + Hard: uint64(1024), + Soft: uint64(1024), + }, + }, + }, + Hostname: "mrsdalloway", + Mounts: []rspec.Mount{ + { + Destination: "/proc", + Type: "proc", + Source: "proc", + Options: nil, + }, + { + Destination: "/dev", + Type: "tmpfs", + Source: "tmpfs", + Options: []string{"nosuid", "strictatime", "mode=755", "size=65536k"}, + }, + { + Destination: "/dev/pts", + Type: "devpts", + Source: "devpts", + Options: []string{"nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620", "gid=5"}, + }, + { + Destination: "/dev/shm", + Type: "tmpfs", + Source: "shm", + Options: []string{"nosuid", "noexec", "nodev", "mode=1777", "size=65536k"}, + }, + { + Destination: "/dev/mqueue", + Type: "mqueue", + Source: "mqueue", + Options: []string{"nosuid", "noexec", "nodev"}, + }, + { + Destination: "/sys", + Type: "sysfs", + Source: "sysfs", + Options: []string{"nosuid", "noexec", "nodev", "ro"}, + }, + }, + Linux: &rspec.Linux{ + Resources: &rspec.LinuxResources{ + Devices: []rspec.LinuxDeviceCgroup{ + { + Allow: false, + Access: "rwm", + }, + }, + }, + Namespaces: []rspec.LinuxNamespace{ + { + Type: "pid", + }, + { + Type: "network", + }, + { + Type: "ipc", + }, + { + Type: "uts", + }, + { + Type: "mount", + }, + }, + Devices: []rspec.LinuxDevice{}, + }, + } + spec.Linux.Seccomp = seccomp.DefaultProfile(&spec) + return Generator{ + spec: &spec, + } +} + +// NewFromSpec creates a spec Generator from a given spec. +func NewFromSpec(spec *rspec.Spec) Generator { + return Generator{ + spec: spec, + } +} + +// NewFromFile loads the template specified in a file into a spec Generator. +func NewFromFile(path string) (Generator, error) { + cf, err := os.Open(path) + if err != nil { + if os.IsNotExist(err) { + return Generator{}, fmt.Errorf("template configuration at %s not found", path) + } + } + defer cf.Close() + + return NewFromTemplate(cf) +} + +// NewFromTemplate loads the template from io.Reader into a spec Generator. +func NewFromTemplate(r io.Reader) (Generator, error) { + var spec rspec.Spec + if err := json.NewDecoder(r).Decode(&spec); err != nil { + return Generator{}, err + } + return Generator{ + spec: &spec, + }, nil +} + +// SetSpec sets the spec in the Generator g. +func (g *Generator) SetSpec(spec *rspec.Spec) { + g.spec = spec +} + +// Spec gets the spec in the Generator g. +func (g *Generator) Spec() *rspec.Spec { + return g.spec +} + +// Save writes the spec into w. +func (g *Generator) Save(w io.Writer, exportOpts ExportOptions) (err error) { + var data []byte + + if g.spec.Linux != nil { + buf, err := json.Marshal(g.spec.Linux) + if err != nil { + return err + } + if string(buf) == "{}" { + g.spec.Linux = nil + } + } + + if exportOpts.Seccomp { + data, err = json.MarshalIndent(g.spec.Linux.Seccomp, "", "\t") + } else { + data, err = json.MarshalIndent(g.spec, "", "\t") + } + if err != nil { + return err + } + + _, err = w.Write(data) + if err != nil { + return err + } + + return nil +} + +// SaveToFile writes the spec into a file. +func (g *Generator) SaveToFile(path string, exportOpts ExportOptions) error { + f, err := os.Create(path) + if err != nil { + return err + } + defer f.Close() + return g.Save(f, exportOpts) +} + +// SetVersion sets g.spec.Version. +func (g *Generator) SetVersion(version string) { + g.initSpec() + g.spec.Version = version +} + +// SetRootPath sets g.spec.Root.Path. +func (g *Generator) SetRootPath(path string) { + g.initSpecRoot() + g.spec.Root.Path = path +} + +// SetRootReadonly sets g.spec.Root.Readonly. +func (g *Generator) SetRootReadonly(b bool) { + g.initSpecRoot() + g.spec.Root.Readonly = b +} + +// SetHostname sets g.spec.Hostname. +func (g *Generator) SetHostname(s string) { + g.initSpec() + g.spec.Hostname = s +} + +// ClearAnnotations clears g.spec.Annotations. +func (g *Generator) ClearAnnotations() { + if g.spec == nil { + return + } + g.spec.Annotations = make(map[string]string) +} + +// AddAnnotation adds an annotation into g.spec.Annotations. +func (g *Generator) AddAnnotation(key, value string) { + g.initSpecAnnotations() + g.spec.Annotations[key] = value +} + +// RemoveAnnotation remove an annotation from g.spec.Annotations. +func (g *Generator) RemoveAnnotation(key string) { + if g.spec == nil || g.spec.Annotations == nil { + return + } + delete(g.spec.Annotations, key) +} + +// SetProcessConsoleSize sets g.spec.Process.ConsoleSize. +func (g *Generator) SetProcessConsoleSize(width, height uint) { + g.initSpecProcessConsoleSize() + g.spec.Process.ConsoleSize.Width = width + g.spec.Process.ConsoleSize.Height = height +} + +// SetProcessUID sets g.spec.Process.User.UID. +func (g *Generator) SetProcessUID(uid uint32) { + g.initSpecProcess() + g.spec.Process.User.UID = uid +} + +// SetProcessGID sets g.spec.Process.User.GID. +func (g *Generator) SetProcessGID(gid uint32) { + g.initSpecProcess() + g.spec.Process.User.GID = gid +} + +// SetProcessCwd sets g.spec.Process.Cwd. +func (g *Generator) SetProcessCwd(cwd string) { + g.initSpecProcess() + g.spec.Process.Cwd = cwd +} + +// SetProcessNoNewPrivileges sets g.spec.Process.NoNewPrivileges. +func (g *Generator) SetProcessNoNewPrivileges(b bool) { + g.initSpecProcess() + g.spec.Process.NoNewPrivileges = b +} + +// SetProcessTerminal sets g.spec.Process.Terminal. +func (g *Generator) SetProcessTerminal(b bool) { + g.initSpecProcess() + g.spec.Process.Terminal = b +} + +// SetProcessApparmorProfile sets g.spec.Process.ApparmorProfile. +func (g *Generator) SetProcessApparmorProfile(prof string) { + g.initSpecProcess() + g.spec.Process.ApparmorProfile = prof +} + +// SetProcessArgs sets g.spec.Process.Args. +func (g *Generator) SetProcessArgs(args []string) { + g.initSpecProcess() + g.spec.Process.Args = args +} + +// ClearProcessEnv clears g.spec.Process.Env. +func (g *Generator) ClearProcessEnv() { + if g.spec == nil { + return + } + g.spec.Process.Env = []string{} +} + +// AddProcessEnv adds name=value into g.spec.Process.Env, or replaces an +// existing entry with the given name. +func (g *Generator) AddProcessEnv(name, value string) { + g.initSpecProcess() + + env := fmt.Sprintf("%s=%s", name, value) + for idx := range g.spec.Process.Env { + if strings.HasPrefix(g.spec.Process.Env[idx], name+"=") { + g.spec.Process.Env[idx] = env + return + } + } + g.spec.Process.Env = append(g.spec.Process.Env, env) +} + +// AddProcessRlimits adds rlimit into g.spec.Process.Rlimits. +func (g *Generator) AddProcessRlimits(rType string, rHard uint64, rSoft uint64) { + g.initSpecProcess() + for i, rlimit := range g.spec.Process.Rlimits { + if rlimit.Type == rType { + g.spec.Process.Rlimits[i].Hard = rHard + g.spec.Process.Rlimits[i].Soft = rSoft + return + } + } + + newRlimit := rspec.POSIXRlimit{ + Type: rType, + Hard: rHard, + Soft: rSoft, + } + g.spec.Process.Rlimits = append(g.spec.Process.Rlimits, newRlimit) +} + +// RemoveProcessRlimits removes a rlimit from g.spec.Process.Rlimits. +func (g *Generator) RemoveProcessRlimits(rType string) error { + if g.spec == nil { + return nil + } + for i, rlimit := range g.spec.Process.Rlimits { + if rlimit.Type == rType { + g.spec.Process.Rlimits = append(g.spec.Process.Rlimits[:i], g.spec.Process.Rlimits[i+1:]...) + return nil + } + } + return nil +} + +// ClearProcessRlimits clear g.spec.Process.Rlimits. +func (g *Generator) ClearProcessRlimits() { + if g.spec == nil { + return + } + g.spec.Process.Rlimits = []rspec.POSIXRlimit{} +} + +// ClearProcessAdditionalGids clear g.spec.Process.AdditionalGids. +func (g *Generator) ClearProcessAdditionalGids() { + if g.spec == nil { + return + } + g.spec.Process.User.AdditionalGids = []uint32{} +} + +// AddProcessAdditionalGid adds an additional gid into g.spec.Process.AdditionalGids. +func (g *Generator) AddProcessAdditionalGid(gid uint32) { + g.initSpecProcess() + for _, group := range g.spec.Process.User.AdditionalGids { + if group == gid { + return + } + } + g.spec.Process.User.AdditionalGids = append(g.spec.Process.User.AdditionalGids, gid) +} + +// SetProcessSelinuxLabel sets g.spec.Process.SelinuxLabel. +func (g *Generator) SetProcessSelinuxLabel(label string) { + g.initSpecProcess() + g.spec.Process.SelinuxLabel = label +} + +// SetLinuxCgroupsPath sets g.spec.Linux.CgroupsPath. +func (g *Generator) SetLinuxCgroupsPath(path string) { + g.initSpecLinux() + g.spec.Linux.CgroupsPath = path +} + +// SetLinuxMountLabel sets g.spec.Linux.MountLabel. +func (g *Generator) SetLinuxMountLabel(label string) { + g.initSpecLinux() + g.spec.Linux.MountLabel = label +} + +// SetProcessOOMScoreAdj sets g.spec.Process.OOMScoreAdj. +func (g *Generator) SetProcessOOMScoreAdj(adj int) { + g.initSpecProcess() + g.spec.Process.OOMScoreAdj = &adj +} + +// SetLinuxResourcesCPUShares sets g.spec.Linux.Resources.CPU.Shares. +func (g *Generator) SetLinuxResourcesCPUShares(shares uint64) { + g.initSpecLinuxResourcesCPU() + g.spec.Linux.Resources.CPU.Shares = &shares +} + +// SetLinuxResourcesCPUQuota sets g.spec.Linux.Resources.CPU.Quota. +func (g *Generator) SetLinuxResourcesCPUQuota(quota int64) { + g.initSpecLinuxResourcesCPU() + g.spec.Linux.Resources.CPU.Quota = "a +} + +// SetLinuxResourcesCPUPeriod sets g.spec.Linux.Resources.CPU.Period. +func (g *Generator) SetLinuxResourcesCPUPeriod(period uint64) { + g.initSpecLinuxResourcesCPU() + g.spec.Linux.Resources.CPU.Period = &period +} + +// SetLinuxResourcesCPURealtimeRuntime sets g.spec.Linux.Resources.CPU.RealtimeRuntime. +func (g *Generator) SetLinuxResourcesCPURealtimeRuntime(time int64) { + g.initSpecLinuxResourcesCPU() + g.spec.Linux.Resources.CPU.RealtimeRuntime = &time +} + +// SetLinuxResourcesCPURealtimePeriod sets g.spec.Linux.Resources.CPU.RealtimePeriod. +func (g *Generator) SetLinuxResourcesCPURealtimePeriod(period uint64) { + g.initSpecLinuxResourcesCPU() + g.spec.Linux.Resources.CPU.RealtimePeriod = &period +} + +// SetLinuxResourcesCPUCpus sets g.spec.Linux.Resources.CPU.Cpus. +func (g *Generator) SetLinuxResourcesCPUCpus(cpus string) { + g.initSpecLinuxResourcesCPU() + g.spec.Linux.Resources.CPU.Cpus = cpus +} + +// SetLinuxResourcesCPUMems sets g.spec.Linux.Resources.CPU.Mems. +func (g *Generator) SetLinuxResourcesCPUMems(mems string) { + g.initSpecLinuxResourcesCPU() + g.spec.Linux.Resources.CPU.Mems = mems +} + +// AddLinuxResourcesHugepageLimit adds or sets g.spec.Linux.Resources.HugepageLimits. +func (g *Generator) AddLinuxResourcesHugepageLimit(pageSize string, limit uint64) { + hugepageLimit := rspec.LinuxHugepageLimit{ + Pagesize: pageSize, + Limit: limit, + } + + g.initSpecLinuxResources() + for i, pageLimit := range g.spec.Linux.Resources.HugepageLimits { + if pageLimit.Pagesize == pageSize { + g.spec.Linux.Resources.HugepageLimits[i].Limit = limit + return + } + } + g.spec.Linux.Resources.HugepageLimits = append(g.spec.Linux.Resources.HugepageLimits, hugepageLimit) +} + +// DropLinuxResourcesHugepageLimit drops a hugepage limit from g.spec.Linux.Resources.HugepageLimits. +func (g *Generator) DropLinuxResourcesHugepageLimit(pageSize string) error { + g.initSpecLinuxResources() + for i, pageLimit := range g.spec.Linux.Resources.HugepageLimits { + if pageLimit.Pagesize == pageSize { + g.spec.Linux.Resources.HugepageLimits = append(g.spec.Linux.Resources.HugepageLimits[:i], g.spec.Linux.Resources.HugepageLimits[i+1:]...) + return nil + } + } + + return nil +} + +// SetLinuxResourcesMemoryLimit sets g.spec.Linux.Resources.Memory.Limit. +func (g *Generator) SetLinuxResourcesMemoryLimit(limit int64) { + g.initSpecLinuxResourcesMemory() + g.spec.Linux.Resources.Memory.Limit = &limit +} + +// SetLinuxResourcesMemoryReservation sets g.spec.Linux.Resources.Memory.Reservation. +func (g *Generator) SetLinuxResourcesMemoryReservation(reservation int64) { + g.initSpecLinuxResourcesMemory() + g.spec.Linux.Resources.Memory.Reservation = &reservation +} + +// SetLinuxResourcesMemorySwap sets g.spec.Linux.Resources.Memory.Swap. +func (g *Generator) SetLinuxResourcesMemorySwap(swap int64) { + g.initSpecLinuxResourcesMemory() + g.spec.Linux.Resources.Memory.Swap = &swap +} + +// SetLinuxResourcesMemoryKernel sets g.spec.Linux.Resources.Memory.Kernel. +func (g *Generator) SetLinuxResourcesMemoryKernel(kernel int64) { + g.initSpecLinuxResourcesMemory() + g.spec.Linux.Resources.Memory.Kernel = &kernel +} + +// SetLinuxResourcesMemoryKernelTCP sets g.spec.Linux.Resources.Memory.KernelTCP. +func (g *Generator) SetLinuxResourcesMemoryKernelTCP(kernelTCP int64) { + g.initSpecLinuxResourcesMemory() + g.spec.Linux.Resources.Memory.KernelTCP = &kernelTCP +} + +// SetLinuxResourcesMemorySwappiness sets g.spec.Linux.Resources.Memory.Swappiness. +func (g *Generator) SetLinuxResourcesMemorySwappiness(swappiness uint64) { + g.initSpecLinuxResourcesMemory() + g.spec.Linux.Resources.Memory.Swappiness = &swappiness +} + +// SetLinuxResourcesMemoryDisableOOMKiller sets g.spec.Linux.Resources.Memory.DisableOOMKiller. +func (g *Generator) SetLinuxResourcesMemoryDisableOOMKiller(disable bool) { + g.initSpecLinuxResourcesMemory() + g.spec.Linux.Resources.Memory.DisableOOMKiller = &disable +} + +// SetLinuxResourcesNetworkClassID sets g.spec.Linux.Resources.Network.ClassID. +func (g *Generator) SetLinuxResourcesNetworkClassID(classid uint32) { + g.initSpecLinuxResourcesNetwork() + g.spec.Linux.Resources.Network.ClassID = &classid +} + +// AddLinuxResourcesNetworkPriorities adds or sets g.spec.Linux.Resources.Network.Priorities. +func (g *Generator) AddLinuxResourcesNetworkPriorities(name string, prio uint32) { + g.initSpecLinuxResourcesNetwork() + for i, netPriority := range g.spec.Linux.Resources.Network.Priorities { + if netPriority.Name == name { + g.spec.Linux.Resources.Network.Priorities[i].Priority = prio + return + } + } + interfacePrio := new(rspec.LinuxInterfacePriority) + interfacePrio.Name = name + interfacePrio.Priority = prio + g.spec.Linux.Resources.Network.Priorities = append(g.spec.Linux.Resources.Network.Priorities, *interfacePrio) +} + +// DropLinuxResourcesNetworkPriorities drops one item from g.spec.Linux.Resources.Network.Priorities. +func (g *Generator) DropLinuxResourcesNetworkPriorities(name string) { + g.initSpecLinuxResourcesNetwork() + for i, netPriority := range g.spec.Linux.Resources.Network.Priorities { + if netPriority.Name == name { + g.spec.Linux.Resources.Network.Priorities = append(g.spec.Linux.Resources.Network.Priorities[:i], g.spec.Linux.Resources.Network.Priorities[i+1:]...) + return + } + } +} + +// SetLinuxResourcesPidsLimit sets g.spec.Linux.Resources.Pids.Limit. +func (g *Generator) SetLinuxResourcesPidsLimit(limit int64) { + g.initSpecLinuxResourcesPids() + g.spec.Linux.Resources.Pids.Limit = limit +} + +// ClearLinuxSysctl clears g.spec.Linux.Sysctl. +func (g *Generator) ClearLinuxSysctl() { + if g.spec == nil || g.spec.Linux == nil { + return + } + g.spec.Linux.Sysctl = make(map[string]string) +} + +// AddLinuxSysctl adds a new sysctl config into g.spec.Linux.Sysctl. +func (g *Generator) AddLinuxSysctl(key, value string) { + g.initSpecLinuxSysctl() + g.spec.Linux.Sysctl[key] = value +} + +// RemoveLinuxSysctl removes a sysctl config from g.spec.Linux.Sysctl. +func (g *Generator) RemoveLinuxSysctl(key string) { + if g.spec == nil || g.spec.Linux == nil || g.spec.Linux.Sysctl == nil { + return + } + delete(g.spec.Linux.Sysctl, key) +} + +// ClearLinuxUIDMappings clear g.spec.Linux.UIDMappings. +func (g *Generator) ClearLinuxUIDMappings() { + if g.spec == nil || g.spec.Linux == nil { + return + } + g.spec.Linux.UIDMappings = []rspec.LinuxIDMapping{} +} + +// AddLinuxUIDMapping adds uidMap into g.spec.Linux.UIDMappings. +func (g *Generator) AddLinuxUIDMapping(hid, cid, size uint32) { + idMapping := rspec.LinuxIDMapping{ + HostID: hid, + ContainerID: cid, + Size: size, + } + + g.initSpecLinux() + g.spec.Linux.UIDMappings = append(g.spec.Linux.UIDMappings, idMapping) +} + +// ClearLinuxGIDMappings clear g.spec.Linux.GIDMappings. +func (g *Generator) ClearLinuxGIDMappings() { + if g.spec == nil || g.spec.Linux == nil { + return + } + g.spec.Linux.GIDMappings = []rspec.LinuxIDMapping{} +} + +// AddLinuxGIDMapping adds gidMap into g.spec.Linux.GIDMappings. +func (g *Generator) AddLinuxGIDMapping(hid, cid, size uint32) { + idMapping := rspec.LinuxIDMapping{ + HostID: hid, + ContainerID: cid, + Size: size, + } + + g.initSpecLinux() + g.spec.Linux.GIDMappings = append(g.spec.Linux.GIDMappings, idMapping) +} + +// SetLinuxRootPropagation sets g.spec.Linux.RootfsPropagation. +func (g *Generator) SetLinuxRootPropagation(rp string) error { + switch rp { + case "": + case "private": + case "rprivate": + case "slave": + case "rslave": + case "shared": + case "rshared": + default: + return fmt.Errorf("rootfs-propagation must be empty or one of private|rprivate|slave|rslave|shared|rshared") + } + g.initSpecLinux() + g.spec.Linux.RootfsPropagation = rp + return nil +} + +// ClearPreStartHooks clear g.spec.Hooks.Prestart. +func (g *Generator) ClearPreStartHooks() { + if g.spec == nil { + return + } + if g.spec.Hooks == nil { + return + } + g.spec.Hooks.Prestart = []rspec.Hook{} +} + +// AddPreStartHook add a prestart hook into g.spec.Hooks.Prestart. +func (g *Generator) AddPreStartHook(path string, args []string) { + g.initSpecHooks() + hook := rspec.Hook{Path: path, Args: args} + for i, hook := range g.spec.Hooks.Prestart { + if hook.Path == path { + g.spec.Hooks.Prestart[i] = hook + return + } + } + g.spec.Hooks.Prestart = append(g.spec.Hooks.Prestart, hook) +} + +// AddPreStartHookEnv adds envs of a prestart hook into g.spec.Hooks.Prestart. +func (g *Generator) AddPreStartHookEnv(path string, envs []string) { + g.initSpecHooks() + for i, hook := range g.spec.Hooks.Prestart { + if hook.Path == path { + g.spec.Hooks.Prestart[i].Env = envs + return + } + } + hook := rspec.Hook{Path: path, Env: envs} + g.spec.Hooks.Prestart = append(g.spec.Hooks.Prestart, hook) +} + +// AddPreStartHookTimeout adds timeout of a prestart hook into g.spec.Hooks.Prestart. +func (g *Generator) AddPreStartHookTimeout(path string, timeout int) { + g.initSpecHooks() + for i, hook := range g.spec.Hooks.Prestart { + if hook.Path == path { + g.spec.Hooks.Prestart[i].Timeout = &timeout + return + } + } + hook := rspec.Hook{Path: path, Timeout: &timeout} + g.spec.Hooks.Prestart = append(g.spec.Hooks.Prestart, hook) +} + +// ClearPostStopHooks clear g.spec.Hooks.Poststop. +func (g *Generator) ClearPostStopHooks() { + if g.spec == nil { + return + } + if g.spec.Hooks == nil { + return + } + g.spec.Hooks.Poststop = []rspec.Hook{} +} + +// AddPostStopHook adds a poststop hook into g.spec.Hooks.Poststop. +func (g *Generator) AddPostStopHook(path string, args []string) { + g.initSpecHooks() + hook := rspec.Hook{Path: path, Args: args} + for i, hook := range g.spec.Hooks.Poststop { + if hook.Path == path { + g.spec.Hooks.Poststop[i] = hook + return + } + } + g.spec.Hooks.Poststop = append(g.spec.Hooks.Poststop, hook) +} + +// AddPostStopHookEnv adds envs of a poststop hook into g.spec.Hooks.Poststop. +func (g *Generator) AddPostStopHookEnv(path string, envs []string) { + g.initSpecHooks() + for i, hook := range g.spec.Hooks.Poststop { + if hook.Path == path { + g.spec.Hooks.Poststop[i].Env = envs + return + } + } + hook := rspec.Hook{Path: path, Env: envs} + g.spec.Hooks.Poststop = append(g.spec.Hooks.Poststop, hook) +} + +// AddPostStopHookTimeout adds timeout of a poststop hook into g.spec.Hooks.Poststop. +func (g *Generator) AddPostStopHookTimeout(path string, timeout int) { + g.initSpecHooks() + for i, hook := range g.spec.Hooks.Poststop { + if hook.Path == path { + g.spec.Hooks.Poststop[i].Timeout = &timeout + return + } + } + hook := rspec.Hook{Path: path, Timeout: &timeout} + g.spec.Hooks.Poststop = append(g.spec.Hooks.Poststop, hook) +} + +// ClearPostStartHooks clear g.spec.Hooks.Poststart. +func (g *Generator) ClearPostStartHooks() { + if g.spec == nil { + return + } + if g.spec.Hooks == nil { + return + } + g.spec.Hooks.Poststart = []rspec.Hook{} +} + +// AddPostStartHook adds a poststart hook into g.spec.Hooks.Poststart. +func (g *Generator) AddPostStartHook(path string, args []string) { + g.initSpecHooks() + hook := rspec.Hook{Path: path, Args: args} + for i, hook := range g.spec.Hooks.Poststart { + if hook.Path == path { + g.spec.Hooks.Poststart[i] = hook + return + } + } + g.spec.Hooks.Poststart = append(g.spec.Hooks.Poststart, hook) +} + +// AddPostStartHookEnv adds envs of a poststart hook into g.spec.Hooks.Poststart. +func (g *Generator) AddPostStartHookEnv(path string, envs []string) { + g.initSpecHooks() + for i, hook := range g.spec.Hooks.Poststart { + if hook.Path == path { + g.spec.Hooks.Poststart[i].Env = envs + return + } + } + hook := rspec.Hook{Path: path, Env: envs} + g.spec.Hooks.Poststart = append(g.spec.Hooks.Poststart, hook) +} + +// AddPostStartHookTimeout adds timeout of a poststart hook into g.spec.Hooks.Poststart. +func (g *Generator) AddPostStartHookTimeout(path string, timeout int) { + g.initSpecHooks() + for i, hook := range g.spec.Hooks.Poststart { + if hook.Path == path { + g.spec.Hooks.Poststart[i].Timeout = &timeout + return + } + } + hook := rspec.Hook{Path: path, Timeout: &timeout} + g.spec.Hooks.Poststart = append(g.spec.Hooks.Poststart, hook) +} + +// AddTmpfsMount adds a tmpfs mount into g.spec.Mounts. +func (g *Generator) AddTmpfsMount(dest string, options []string) { + mnt := rspec.Mount{ + Destination: dest, + Type: "tmpfs", + Source: "tmpfs", + Options: options, + } + + g.initSpec() + g.spec.Mounts = append(g.spec.Mounts, mnt) +} + +// AddCgroupsMount adds a cgroup mount into g.spec.Mounts. +func (g *Generator) AddCgroupsMount(mountCgroupOption string) error { + switch mountCgroupOption { + case "ro": + case "rw": + case "no": + return nil + default: + return fmt.Errorf("--mount-cgroups should be one of (ro,rw,no)") + } + + mnt := rspec.Mount{ + Destination: "/sys/fs/cgroup", + Type: "cgroup", + Source: "cgroup", + Options: []string{"nosuid", "noexec", "nodev", "relatime", mountCgroupOption}, + } + g.initSpec() + g.spec.Mounts = append(g.spec.Mounts, mnt) + + return nil +} + +// AddBindMount adds a bind mount into g.spec.Mounts. +func (g *Generator) AddBindMount(source, dest string, options []string) { + if len(options) == 0 { + options = []string{"rw"} + } + + // We have to make sure that there is a bind option set, otherwise it won't + // be an actual bindmount. + foundBindOption := false + for _, opt := range options { + if opt == "bind" || opt == "rbind" { + foundBindOption = true + break + } + } + if !foundBindOption { + options = append(options, "bind") + } + + mnt := rspec.Mount{ + Destination: dest, + Type: "bind", + Source: source, + Options: options, + } + g.initSpec() + g.spec.Mounts = append(g.spec.Mounts, mnt) +} + +// SetupPrivileged sets up the privilege-related fields inside g.spec. +func (g *Generator) SetupPrivileged(privileged bool) { + if privileged { // Add all capabilities in privileged mode. + var finalCapList []string + for _, cap := range capability.List() { + if g.HostSpecific && cap > validate.LastCap() { + continue + } + finalCapList = append(finalCapList, fmt.Sprintf("CAP_%s", strings.ToUpper(cap.String()))) + } + g.initSpecLinux() + g.initSpecProcessCapabilities() + g.ClearProcessCapabilities() + g.spec.Process.Capabilities.Bounding = append(g.spec.Process.Capabilities.Bounding, finalCapList...) + g.spec.Process.Capabilities.Effective = append(g.spec.Process.Capabilities.Effective, finalCapList...) + g.spec.Process.Capabilities.Inheritable = append(g.spec.Process.Capabilities.Inheritable, finalCapList...) + g.spec.Process.Capabilities.Permitted = append(g.spec.Process.Capabilities.Permitted, finalCapList...) + g.spec.Process.Capabilities.Ambient = append(g.spec.Process.Capabilities.Ambient, finalCapList...) + g.spec.Process.SelinuxLabel = "" + g.spec.Process.ApparmorProfile = "" + g.spec.Linux.Seccomp = nil + } +} + +// ClearProcessCapabilities clear g.spec.Process.Capabilities. +func (g *Generator) ClearProcessCapabilities() { + if g.spec == nil { + return + } + g.spec.Process.Capabilities.Bounding = []string{} + g.spec.Process.Capabilities.Effective = []string{} + g.spec.Process.Capabilities.Inheritable = []string{} + g.spec.Process.Capabilities.Permitted = []string{} + g.spec.Process.Capabilities.Ambient = []string{} +} + +// AddProcessCapability adds a process capability into g.spec.Process.Capabilities. +func (g *Generator) AddProcessCapability(c string) error { + cp := strings.ToUpper(c) + if err := validate.CapValid(cp, g.HostSpecific); err != nil { + return err + } + + g.initSpecProcessCapabilities() + + var foundBounding bool + for _, cap := range g.spec.Process.Capabilities.Bounding { + if strings.ToUpper(cap) == cp { + foundBounding = true + break + } + } + if !foundBounding { + g.spec.Process.Capabilities.Bounding = append(g.spec.Process.Capabilities.Bounding, cp) + } + + var foundEffective bool + for _, cap := range g.spec.Process.Capabilities.Effective { + if strings.ToUpper(cap) == cp { + foundEffective = true + break + } + } + if !foundEffective { + g.spec.Process.Capabilities.Effective = append(g.spec.Process.Capabilities.Effective, cp) + } + + var foundInheritable bool + for _, cap := range g.spec.Process.Capabilities.Inheritable { + if strings.ToUpper(cap) == cp { + foundInheritable = true + break + } + } + if !foundInheritable { + g.spec.Process.Capabilities.Inheritable = append(g.spec.Process.Capabilities.Inheritable, cp) + } + + var foundPermitted bool + for _, cap := range g.spec.Process.Capabilities.Permitted { + if strings.ToUpper(cap) == cp { + foundPermitted = true + break + } + } + if !foundPermitted { + g.spec.Process.Capabilities.Permitted = append(g.spec.Process.Capabilities.Permitted, cp) + } + + var foundAmbient bool + for _, cap := range g.spec.Process.Capabilities.Ambient { + if strings.ToUpper(cap) == cp { + foundAmbient = true + break + } + } + if !foundAmbient { + g.spec.Process.Capabilities.Ambient = append(g.spec.Process.Capabilities.Ambient, cp) + } + + return nil +} + +// DropProcessCapability drops a process capability from g.spec.Process.Capabilities. +func (g *Generator) DropProcessCapability(c string) error { + cp := strings.ToUpper(c) + if err := validate.CapValid(cp, g.HostSpecific); err != nil { + return err + } + + g.initSpecProcessCapabilities() + + // we don't care about order...and this is way faster... + removeFunc := func(s []string, i int) []string { + s[i] = s[len(s)-1] + return s[:len(s)-1] + } + + for i, cap := range g.spec.Process.Capabilities.Bounding { + if strings.ToUpper(cap) == cp { + g.spec.Process.Capabilities.Bounding = removeFunc(g.spec.Process.Capabilities.Bounding, i) + } + } + + for i, cap := range g.spec.Process.Capabilities.Effective { + if strings.ToUpper(cap) == cp { + g.spec.Process.Capabilities.Effective = removeFunc(g.spec.Process.Capabilities.Effective, i) + } + } + + for i, cap := range g.spec.Process.Capabilities.Inheritable { + if strings.ToUpper(cap) == cp { + g.spec.Process.Capabilities.Inheritable = removeFunc(g.spec.Process.Capabilities.Inheritable, i) + } + } + + for i, cap := range g.spec.Process.Capabilities.Permitted { + if strings.ToUpper(cap) == cp { + g.spec.Process.Capabilities.Permitted = removeFunc(g.spec.Process.Capabilities.Permitted, i) + } + } + + for i, cap := range g.spec.Process.Capabilities.Ambient { + if strings.ToUpper(cap) == cp { + g.spec.Process.Capabilities.Ambient = removeFunc(g.spec.Process.Capabilities.Ambient, i) + } + } + + return nil +} + +func mapStrToNamespace(ns string, path string) (rspec.LinuxNamespace, error) { + switch ns { + case "network": + return rspec.LinuxNamespace{Type: rspec.NetworkNamespace, Path: path}, nil + case "pid": + return rspec.LinuxNamespace{Type: rspec.PIDNamespace, Path: path}, nil + case "mount": + return rspec.LinuxNamespace{Type: rspec.MountNamespace, Path: path}, nil + case "ipc": + return rspec.LinuxNamespace{Type: rspec.IPCNamespace, Path: path}, nil + case "uts": + return rspec.LinuxNamespace{Type: rspec.UTSNamespace, Path: path}, nil + case "user": + return rspec.LinuxNamespace{Type: rspec.UserNamespace, Path: path}, nil + case "cgroup": + return rspec.LinuxNamespace{Type: rspec.CgroupNamespace, Path: path}, nil + default: + return rspec.LinuxNamespace{}, fmt.Errorf("unrecognized namespace %q", ns) + } +} + +// ClearLinuxNamespaces clear g.spec.Linux.Namespaces. +func (g *Generator) ClearLinuxNamespaces() { + if g.spec == nil || g.spec.Linux == nil { + return + } + g.spec.Linux.Namespaces = []rspec.LinuxNamespace{} +} + +// AddOrReplaceLinuxNamespace adds or replaces a namespace inside +// g.spec.Linux.Namespaces. +func (g *Generator) AddOrReplaceLinuxNamespace(ns string, path string) error { + namespace, err := mapStrToNamespace(ns, path) + if err != nil { + return err + } + + g.initSpecLinux() + for i, ns := range g.spec.Linux.Namespaces { + if ns.Type == namespace.Type { + g.spec.Linux.Namespaces[i] = namespace + return nil + } + } + g.spec.Linux.Namespaces = append(g.spec.Linux.Namespaces, namespace) + return nil +} + +// RemoveLinuxNamespace removes a namespace from g.spec.Linux.Namespaces. +func (g *Generator) RemoveLinuxNamespace(ns string) error { + namespace, err := mapStrToNamespace(ns, "") + if err != nil { + return err + } + + if g.spec == nil || g.spec.Linux == nil { + return nil + } + for i, ns := range g.spec.Linux.Namespaces { + if ns.Type == namespace.Type { + g.spec.Linux.Namespaces = append(g.spec.Linux.Namespaces[:i], g.spec.Linux.Namespaces[i+1:]...) + return nil + } + } + return nil +} + +// AddDevice - add a device into g.spec.Linux.Devices +func (g *Generator) AddDevice(device rspec.LinuxDevice) { + g.initSpecLinux() + + for i, dev := range g.spec.Linux.Devices { + if dev.Path == device.Path { + g.spec.Linux.Devices[i] = device + return + } + if dev.Type == device.Type && dev.Major == device.Major && dev.Minor == device.Minor { + fmt.Fprintln(os.Stderr, "WARNING: The same type, major and minor should not be used for multiple devices.") + } + } + + g.spec.Linux.Devices = append(g.spec.Linux.Devices, device) +} + +// RemoveDevice remove a device from g.spec.Linux.Devices +func (g *Generator) RemoveDevice(path string) error { + if g.spec == nil || g.spec.Linux == nil || g.spec.Linux.Devices == nil { + return nil + } + + for i, device := range g.spec.Linux.Devices { + if device.Path == path { + g.spec.Linux.Devices = append(g.spec.Linux.Devices[:i], g.spec.Linux.Devices[i+1:]...) + return nil + } + } + return nil +} + +// ClearLinuxDevices clears g.spec.Linux.Devices +func (g *Generator) ClearLinuxDevices() { + if g.spec == nil || g.spec.Linux == nil || g.spec.Linux.Devices == nil { + return + } + + g.spec.Linux.Devices = []rspec.LinuxDevice{} +} + +// strPtr returns the pointer pointing to the string s. +func strPtr(s string) *string { return &s } + +// SetSyscallAction adds rules for syscalls with the specified action +func (g *Generator) SetSyscallAction(arguments seccomp.SyscallOpts) error { + g.initSpecLinuxSeccomp() + return seccomp.ParseSyscallFlag(arguments, g.spec.Linux.Seccomp) +} + +// SetDefaultSeccompAction sets the default action for all syscalls not defined +// and then removes any syscall rules with this action already specified. +func (g *Generator) SetDefaultSeccompAction(action string) error { + g.initSpecLinuxSeccomp() + return seccomp.ParseDefaultAction(action, g.spec.Linux.Seccomp) +} + +// SetDefaultSeccompActionForce only sets the default action for all syscalls not defined +func (g *Generator) SetDefaultSeccompActionForce(action string) error { + g.initSpecLinuxSeccomp() + return seccomp.ParseDefaultActionForce(action, g.spec.Linux.Seccomp) +} + +// SetSeccompArchitecture sets the supported seccomp architectures +func (g *Generator) SetSeccompArchitecture(architecture string) error { + g.initSpecLinuxSeccomp() + return seccomp.ParseArchitectureFlag(architecture, g.spec.Linux.Seccomp) +} + +// RemoveSeccompRule removes rules for any specified syscalls +func (g *Generator) RemoveSeccompRule(arguments string) error { + g.initSpecLinuxSeccomp() + return seccomp.RemoveAction(arguments, g.spec.Linux.Seccomp) +} + +// RemoveAllSeccompRules removes all syscall rules +func (g *Generator) RemoveAllSeccompRules() error { + g.initSpecLinuxSeccomp() + return seccomp.RemoveAllSeccompRules(g.spec.Linux.Seccomp) +} + +// AddLinuxMaskedPaths adds masked paths into g.spec.Linux.MaskedPaths. +func (g *Generator) AddLinuxMaskedPaths(path string) { + g.initSpecLinux() + g.spec.Linux.MaskedPaths = append(g.spec.Linux.MaskedPaths, path) +} + +// AddLinuxReadonlyPaths adds readonly paths into g.spec.Linux.MaskedPaths. +func (g *Generator) AddLinuxReadonlyPaths(path string) { + g.initSpecLinux() + g.spec.Linux.ReadonlyPaths = append(g.spec.Linux.ReadonlyPaths, path) +} diff --git a/vendor/github.com/opencontainers/runtime-tools/generate/seccomp/consts.go b/vendor/github.com/opencontainers/runtime-tools/generate/seccomp/consts.go new file mode 100644 index 000000000..eade5718e --- /dev/null +++ b/vendor/github.com/opencontainers/runtime-tools/generate/seccomp/consts.go @@ -0,0 +1,12 @@ +package seccomp + +const ( + seccompOverwrite = "overwrite" + seccompAppend = "append" + nothing = "nothing" + kill = "kill" + trap = "trap" + trace = "trace" + allow = "allow" + errno = "errno" +) diff --git a/vendor/github.com/opencontainers/runtime-tools/generate/seccomp/parse_action.go b/vendor/github.com/opencontainers/runtime-tools/generate/seccomp/parse_action.go new file mode 100644 index 000000000..25daf0752 --- /dev/null +++ b/vendor/github.com/opencontainers/runtime-tools/generate/seccomp/parse_action.go @@ -0,0 +1,135 @@ +package seccomp + +import ( + "fmt" + "strconv" + "strings" + + rspec "github.com/opencontainers/runtime-spec/specs-go" +) + +// SyscallOpts contain options for parsing syscall rules +type SyscallOpts struct { + Action string + Syscall string + Index string + Value string + ValueTwo string + Operator string +} + +// ParseSyscallFlag takes a SyscallOpts struct and the seccomp configuration +// and sets the new syscall rule accordingly +func ParseSyscallFlag(args SyscallOpts, config *rspec.LinuxSeccomp) error { + var arguments []string + if args.Index != "" && args.Value != "" && args.ValueTwo != "" && args.Operator != "" { + arguments = []string{args.Action, args.Syscall, args.Index, args.Value, + args.ValueTwo, args.Operator} + } else { + arguments = []string{args.Action, args.Syscall} + } + + action, _ := parseAction(arguments[0]) + if action == config.DefaultAction && args.argsAreEmpty() { + // default already set, no need to make changes + return nil + } + + var newSyscall rspec.LinuxSyscall + numOfArgs := len(arguments) + if numOfArgs == 6 || numOfArgs == 2 { + argStruct, err := parseArguments(arguments[1:]) + if err != nil { + return err + } + newSyscall = newSyscallStruct(arguments[1], action, argStruct) + } else { + return fmt.Errorf("incorrect number of arguments to ParseSyscall: %d", numOfArgs) + } + + descison, err := decideCourseOfAction(&newSyscall, config.Syscalls) + if err != nil { + return err + } + delimDescison := strings.Split(descison, ":") + + if delimDescison[0] == seccompAppend { + config.Syscalls = append(config.Syscalls, newSyscall) + } + + if delimDescison[0] == seccompOverwrite { + indexForOverwrite, err := strconv.ParseInt(delimDescison[1], 10, 32) + if err != nil { + return err + } + config.Syscalls[indexForOverwrite] = newSyscall + } + + return nil +} + +var actions = map[string]rspec.LinuxSeccompAction{ + "allow": rspec.ActAllow, + "errno": rspec.ActErrno, + "kill": rspec.ActKill, + "trace": rspec.ActTrace, + "trap": rspec.ActTrap, +} + +// Take passed action, return the SCMP_ACT_<ACTION> version of it +func parseAction(action string) (rspec.LinuxSeccompAction, error) { + a, ok := actions[action] + if !ok { + return "", fmt.Errorf("unrecognized action: %s", action) + } + return a, nil +} + +// ParseDefaultAction sets the default action of the seccomp configuration +// and then removes any rules that were already specified with this action +func ParseDefaultAction(action string, config *rspec.LinuxSeccomp) error { + if action == "" { + return nil + } + + defaultAction, err := parseAction(action) + if err != nil { + return err + } + config.DefaultAction = defaultAction + err = RemoveAllMatchingRules(config, defaultAction) + if err != nil { + return err + } + return nil +} + +// ParseDefaultActionForce simply sets the default action of the seccomp configuration +func ParseDefaultActionForce(action string, config *rspec.LinuxSeccomp) error { + if action == "" { + return nil + } + + defaultAction, err := parseAction(action) + if err != nil { + return err + } + config.DefaultAction = defaultAction + return nil +} + +func newSyscallStruct(name string, action rspec.LinuxSeccompAction, args []rspec.LinuxSeccompArg) rspec.LinuxSyscall { + syscallStruct := rspec.LinuxSyscall{ + Names: []string{name}, + Action: action, + Args: args, + } + return syscallStruct +} + +func (s SyscallOpts) argsAreEmpty() bool { + return (s.Index == "" && + s.Value == "" && + s.ValueTwo == "" && + s.Operator == "") +} diff --git a/vendor/github.com/opencontainers/runtime-tools/generate/seccomp/parse_architecture.go b/vendor/github.com/opencontainers/runtime-tools/generate/seccomp/parse_architecture.go new file mode 100644 index 000000000..9b2bdfd2f --- /dev/null +++ b/vendor/github.com/opencontainers/runtime-tools/generate/seccomp/parse_architecture.go @@ -0,0 +1,55 @@ +package seccomp + +import ( + "fmt" + + rspec "github.com/opencontainers/runtime-spec/specs-go" +) + +// ParseArchitectureFlag takes the raw string passed with the --arch flag, parses it +// and updates the Seccomp config accordingly +func ParseArchitectureFlag(architectureArg string, config *rspec.LinuxSeccomp) error { + correctedArch, err := parseArch(architectureArg) + if err != nil { + return err + } + + shouldAppend := true + for _, alreadySpecified := range config.Architectures { + if correctedArch == alreadySpecified { + shouldAppend = false + } + } + if shouldAppend { + config.Architectures = append(config.Architectures, correctedArch) + } + return nil +} + +func parseArch(arch string) (rspec.Arch, error) { + arches := map[string]rspec.Arch{ + "x86": rspec.ArchX86, + "amd64": rspec.ArchX86_64, + "x32": rspec.ArchX32, + "arm": rspec.ArchARM, + "arm64": rspec.ArchAARCH64, + "mips": rspec.ArchMIPS, + "mips64": rspec.ArchMIPS64, + "mips64n32": rspec.ArchMIPS64N32, + "mipsel": rspec.ArchMIPSEL, + "mipsel64": rspec.ArchMIPSEL64, + "mipsel64n32": rspec.ArchMIPSEL64N32, + "parisc": rspec.ArchPARISC, + "parisc64": rspec.ArchPARISC64, + "ppc": rspec.ArchPPC, + "ppc64": rspec.ArchPPC64, + "ppc64le": rspec.ArchPPC64LE, + "s390": rspec.ArchS390, + "s390x": rspec.ArchS390X, + } + a, ok := arches[arch] + if !ok { + return "", fmt.Errorf("unrecognized architecture: %s", arch) + } + return a, nil +} diff --git a/vendor/github.com/opencontainers/runtime-tools/generate/seccomp/parse_arguments.go b/vendor/github.com/opencontainers/runtime-tools/generate/seccomp/parse_arguments.go new file mode 100644 index 000000000..2b4c394e6 --- /dev/null +++ b/vendor/github.com/opencontainers/runtime-tools/generate/seccomp/parse_arguments.go @@ -0,0 +1,73 @@ +package seccomp + +import ( + "fmt" + "strconv" + + rspec "github.com/opencontainers/runtime-spec/specs-go" +) + +// parseArguments takes a list of arguments (delimArgs). It parses and fills out +// the argument information and returns a slice of arg structs +func parseArguments(delimArgs []string) ([]rspec.LinuxSeccompArg, error) { + nilArgSlice := []rspec.LinuxSeccompArg{} + numberOfArgs := len(delimArgs) + + // No parameters passed with syscall + if numberOfArgs == 1 { + return nilArgSlice, nil + } + + // Correct number of parameters passed with syscall + if numberOfArgs == 5 { + syscallIndex, err := strconv.ParseUint(delimArgs[1], 10, 0) + if err != nil { + return nilArgSlice, err + } + + syscallValue, err := strconv.ParseUint(delimArgs[2], 10, 64) + if err != nil { + return nilArgSlice, err + } + + syscallValueTwo, err := strconv.ParseUint(delimArgs[3], 10, 64) + if err != nil { + return nilArgSlice, err + } + + syscallOp, err := parseOperator(delimArgs[4]) + if err != nil { + return nilArgSlice, err + } + + argStruct := rspec.LinuxSeccompArg{ + Index: uint(syscallIndex), + Value: syscallValue, + ValueTwo: syscallValueTwo, + Op: syscallOp, + } + + argSlice := []rspec.LinuxSeccompArg{} + argSlice = append(argSlice, argStruct) + return argSlice, nil + } + + return nilArgSlice, fmt.Errorf("incorrect number of arguments passed with syscall: %d", numberOfArgs) +} + +func parseOperator(operator string) (rspec.LinuxSeccompOperator, error) { + operators := map[string]rspec.LinuxSeccompOperator{ + "NE": rspec.OpNotEqual, + "LT": rspec.OpLessThan, + "LE": rspec.OpLessEqual, + "EQ": rspec.OpEqualTo, + "GE": rspec.OpGreaterEqual, + "GT": rspec.OpGreaterThan, + "ME": rspec.OpMaskedEqual, + } + o, ok := operators[operator] + if !ok { + return "", fmt.Errorf("unrecognized operator: %s", operator) + } + return o, nil +} diff --git a/vendor/github.com/opencontainers/runtime-tools/generate/seccomp/parse_remove.go b/vendor/github.com/opencontainers/runtime-tools/generate/seccomp/parse_remove.go new file mode 100644 index 000000000..59537d49c --- /dev/null +++ b/vendor/github.com/opencontainers/runtime-tools/generate/seccomp/parse_remove.go @@ -0,0 +1,52 @@ +package seccomp + +import ( + "fmt" + "reflect" + "strings" + + rspec "github.com/opencontainers/runtime-spec/specs-go" +) + +// RemoveAction takes the argument string that was passed with the --remove flag, +// parses it, and updates the Seccomp config accordingly +func RemoveAction(arguments string, config *rspec.LinuxSeccomp) error { + if config == nil { + return fmt.Errorf("Cannot remove action from nil Seccomp pointer") + } + + syscallsToRemove := strings.Split(arguments, ",") + + for counter, syscallStruct := range config.Syscalls { + if reflect.DeepEqual(syscallsToRemove, syscallStruct.Names) { + config.Syscalls = append(config.Syscalls[:counter], config.Syscalls[counter+1:]...) + } + } + + return nil +} + +// RemoveAllSeccompRules removes all seccomp syscall rules +func RemoveAllSeccompRules(config *rspec.LinuxSeccomp) error { + if config == nil { + return fmt.Errorf("Cannot remove action from nil Seccomp pointer") + } + newSyscallSlice := []rspec.LinuxSyscall{} + config.Syscalls = newSyscallSlice + return nil +} + +// RemoveAllMatchingRules will remove any syscall rules that match the specified action +func RemoveAllMatchingRules(config *rspec.LinuxSeccomp, seccompAction rspec.LinuxSeccompAction) error { + if config == nil { + return fmt.Errorf("Cannot remove action from nil Seccomp pointer") + } + + for _, syscall := range config.Syscalls { + if reflect.DeepEqual(syscall.Action, seccompAction) { + RemoveAction(strings.Join(syscall.Names, ","), config) + } + } + + return nil +} diff --git a/vendor/github.com/opencontainers/runtime-tools/generate/seccomp/seccomp_default.go b/vendor/github.com/opencontainers/runtime-tools/generate/seccomp/seccomp_default.go new file mode 100644 index 000000000..35b12cd65 --- /dev/null +++ b/vendor/github.com/opencontainers/runtime-tools/generate/seccomp/seccomp_default.go @@ -0,0 +1,577 @@ +package seccomp + +import ( + "runtime" + "syscall" + + "github.com/opencontainers/runtime-spec/specs-go" + rspec "github.com/opencontainers/runtime-spec/specs-go" +) + +func arches() []rspec.Arch { + native := runtime.GOARCH + + switch native { + case "amd64": + return []rspec.Arch{rspec.ArchX86_64, rspec.ArchX86, rspec.ArchX32} + case "arm64": + return []rspec.Arch{rspec.ArchARM, rspec.ArchAARCH64} + case "mips64": + return []rspec.Arch{rspec.ArchMIPS, rspec.ArchMIPS64, rspec.ArchMIPS64N32} + case "mips64n32": + return []rspec.Arch{rspec.ArchMIPS, rspec.ArchMIPS64, rspec.ArchMIPS64N32} + case "mipsel64": + return []rspec.Arch{rspec.ArchMIPSEL, rspec.ArchMIPSEL64, rspec.ArchMIPSEL64N32} + case "mipsel64n32": + return []rspec.Arch{rspec.ArchMIPSEL, rspec.ArchMIPSEL64, rspec.ArchMIPSEL64N32} + case "s390x": + return []rspec.Arch{rspec.ArchS390, rspec.ArchS390X} + default: + return []rspec.Arch{} + } +} + +// DefaultProfile defines the whitelist for the default seccomp profile. +func DefaultProfile(rs *specs.Spec) *rspec.LinuxSeccomp { + + syscalls := []rspec.LinuxSyscall{ + { + Names: []string{ + "accept", + "accept4", + "access", + "alarm", + "bind", + "brk", + "capget", + "capset", + "chdir", + "chmod", + "chown", + "chown32", + "clock_getres", + "clock_gettime", + "clock_nanosleep", + "close", + "connect", + "copy_file_range", + "creat", + "dup", + "dup2", + "dup3", + "epoll_create", + "epoll_create1", + "epoll_ctl", + "epoll_ctl_old", + "epoll_pwait", + "epoll_wait", + "epoll_wait_old", + "eventfd", + "eventfd2", + "execve", + "execveat", + "exit", + "exit_group", + "faccessat", + "fadvise64", + "fadvise64_64", + "fallocate", + "fanotify_mark", + "fchdir", + "fchmod", + "fchmodat", + "fchown", + "fchown32", + "fchownat", + "fcntl", + "fcntl64", + "fdatasync", + "fgetxattr", + "flistxattr", + "flock", + "fork", + "fremovexattr", + "fsetxattr", + "fstat", + "fstat64", + "fstatat64", + "fstatfs", + "fstatfs64", + "fsync", + "ftruncate", + "ftruncate64", + "futex", + "futimesat", + "getcpu", + "getcwd", + "getdents", + "getdents64", + "getegid", + "getegid32", + "geteuid", + "geteuid32", + "getgid", + "getgid32", + "getgroups", + "getgroups32", + "getitimer", + "getpeername", + "getpgid", + "getpgrp", + "getpid", + "getppid", + "getpriority", + "getrandom", + "getresgid", + "getresgid32", + "getresuid", + "getresuid32", + "getrlimit", + "get_robust_list", + "getrusage", + "getsid", + "getsockname", + "getsockopt", + "get_thread_area", + "gettid", + "gettimeofday", + "getuid", + "getuid32", + "getxattr", + "inotify_add_watch", + "inotify_init", + "inotify_init1", + "inotify_rm_watch", + "io_cancel", + "ioctl", + "io_destroy", + "io_getevents", + "ioprio_get", + "ioprio_set", + "io_setup", + "io_submit", + "ipc", + "kill", + "lchown", + "lchown32", + "lgetxattr", + "link", + "linkat", + "listen", + "listxattr", + "llistxattr", + "_llseek", + "lremovexattr", + "lseek", + "lsetxattr", + "lstat", + "lstat64", + "madvise", + "memfd_create", + "mincore", + "mkdir", + "mkdirat", + "mknod", + "mknodat", + "mlock", + "mlock2", + "mlockall", + "mmap", + "mmap2", + "mprotect", + "mq_getsetattr", + "mq_notify", + "mq_open", + "mq_timedreceive", + "mq_timedsend", + "mq_unlink", + "mremap", + "msgctl", + "msgget", + "msgrcv", + "msgsnd", + "msync", + "munlock", + "munlockall", + "munmap", + "nanosleep", + "newfstatat", + "_newselect", + "open", + "openat", + "pause", + "pipe", + "pipe2", + "poll", + "ppoll", + "prctl", + "pread64", + "preadv", + "prlimit64", + "pselect6", + "pwrite64", + "pwritev", + "read", + "readahead", + "readlink", + "readlinkat", + "readv", + "recv", + "recvfrom", + "recvmmsg", + "recvmsg", + "remap_file_pages", + "removexattr", + "rename", + "renameat", + "renameat2", + "restart_syscall", + "rmdir", + "rt_sigaction", + "rt_sigpending", + "rt_sigprocmask", + "rt_sigqueueinfo", + "rt_sigreturn", + "rt_sigsuspend", + "rt_sigtimedwait", + "rt_tgsigqueueinfo", + "sched_getaffinity", + "sched_getattr", + "sched_getparam", + "sched_get_priority_max", + "sched_get_priority_min", + "sched_getscheduler", + "sched_rr_get_interval", + "sched_setaffinity", + "sched_setattr", + "sched_setparam", + "sched_setscheduler", + "sched_yield", + "seccomp", + "select", + "semctl", + "semget", + "semop", + "semtimedop", + "send", + "sendfile", + "sendfile64", + "sendmmsg", + "sendmsg", + "sendto", + "setfsgid", + "setfsgid32", + "setfsuid", + "setfsuid32", + "setgid", + "setgid32", + "setgroups", + "setgroups32", + "setitimer", + "setpgid", + "setpriority", + "setregid", + "setregid32", + "setresgid", + "setresgid32", + "setresuid", + "setresuid32", + "setreuid", + "setreuid32", + "setrlimit", + "set_robust_list", + "setsid", + "setsockopt", + "set_thread_area", + "set_tid_address", + "setuid", + "setuid32", + "setxattr", + "shmat", + "shmctl", + "shmdt", + "shmget", + "shutdown", + "sigaltstack", + "signalfd", + "signalfd4", + "sigreturn", + "socket", + "socketcall", + "socketpair", + "splice", + "stat", + "stat64", + "statfs", + "statfs64", + "symlink", + "symlinkat", + "sync", + "sync_file_range", + "syncfs", + "sysinfo", + "syslog", + "tee", + "tgkill", + "time", + "timer_create", + "timer_delete", + "timerfd_create", + "timerfd_gettime", + "timerfd_settime", + "timer_getoverrun", + "timer_gettime", + "timer_settime", + "times", + "tkill", + "truncate", + "truncate64", + "ugetrlimit", + "umask", + "uname", + "unlink", + "unlinkat", + "utime", + "utimensat", + "utimes", + "vfork", + "vmsplice", + "wait4", + "waitid", + "waitpid", + "write", + "writev", + }, + Action: rspec.ActAllow, + Args: []rspec.LinuxSeccompArg{}, + }, + { + Names: []string{"personality"}, + Action: rspec.ActAllow, + Args: []rspec.LinuxSeccompArg{ + { + Index: 0, + Value: 0x0, + Op: rspec.OpEqualTo, + }, + { + Index: 0, + Value: 0x0008, + Op: rspec.OpEqualTo, + }, + { + Index: 0, + Value: 0xffffffff, + Op: rspec.OpEqualTo, + }, + }, + }, + } + var sysCloneFlagsIndex uint + + capSysAdmin := false + caps := make(map[string]bool) + + for _, cap := range rs.Process.Capabilities.Bounding { + caps[cap] = true + } + for _, cap := range rs.Process.Capabilities.Effective { + caps[cap] = true + } + for _, cap := range rs.Process.Capabilities.Inheritable { + caps[cap] = true + } + for _, cap := range rs.Process.Capabilities.Permitted { + caps[cap] = true + } + for _, cap := range rs.Process.Capabilities.Ambient { + caps[cap] = true + } + + for cap := range caps { + switch cap { + case "CAP_DAC_READ_SEARCH": + syscalls = append(syscalls, []rspec.LinuxSyscall{ + { + Names: []string{"open_by_handle_at"}, + Action: rspec.ActAllow, + Args: []rspec.LinuxSeccompArg{}, + }, + }...) + case "CAP_SYS_ADMIN": + capSysAdmin = true + syscalls = append(syscalls, []rspec.LinuxSyscall{ + { + Names: []string{ + "bpf", + "clone", + "fanotify_init", + "lookup_dcookie", + "mount", + "name_to_handle_at", + "perf_event_open", + "setdomainname", + "sethostname", + "setns", + "umount", + "umount2", + "unshare", + }, + Action: rspec.ActAllow, + Args: []rspec.LinuxSeccompArg{}, + }, + }...) + case "CAP_SYS_BOOT": + syscalls = append(syscalls, []rspec.LinuxSyscall{ + { + Names: []string{"reboot"}, + Action: rspec.ActAllow, + Args: []rspec.LinuxSeccompArg{}, + }, + }...) + case "CAP_SYS_CHROOT": + syscalls = append(syscalls, []rspec.LinuxSyscall{ + { + Names: []string{"chroot"}, + Action: rspec.ActAllow, + Args: []rspec.LinuxSeccompArg{}, + }, + }...) + case "CAP_SYS_MODULE": + syscalls = append(syscalls, []rspec.LinuxSyscall{ + { + Names: []string{ + "delete_module", + "init_module", + "finit_module", + "query_module", + }, + Action: rspec.ActAllow, + Args: []rspec.LinuxSeccompArg{}, + }, + }...) + case "CAP_SYS_PACCT": + syscalls = append(syscalls, []rspec.LinuxSyscall{ + { + Names: []string{"acct"}, + Action: rspec.ActAllow, + Args: []rspec.LinuxSeccompArg{}, + }, + }...) + case "CAP_SYS_PTRACE": + syscalls = append(syscalls, []rspec.LinuxSyscall{ + { + Names: []string{ + "kcmp", + "process_vm_readv", + "process_vm_writev", + "ptrace", + }, + Action: rspec.ActAllow, + Args: []rspec.LinuxSeccompArg{}, + }, + }...) + case "CAP_SYS_RAWIO": + syscalls = append(syscalls, []rspec.LinuxSyscall{ + { + Names: []string{ + "iopl", + "ioperm", + }, + Action: rspec.ActAllow, + Args: []rspec.LinuxSeccompArg{}, + }, + }...) + case "CAP_SYS_TIME": + syscalls = append(syscalls, []rspec.LinuxSyscall{ + { + Names: []string{ + "settimeofday", + "stime", + "adjtimex", + }, + Action: rspec.ActAllow, + Args: []rspec.LinuxSeccompArg{}, + }, + }...) + case "CAP_SYS_TTY_CONFIG": + syscalls = append(syscalls, []rspec.LinuxSyscall{ + { + Names: []string{"vhangup"}, + Action: rspec.ActAllow, + Args: []rspec.LinuxSeccompArg{}, + }, + }...) + } + } + + if !capSysAdmin { + syscalls = append(syscalls, []rspec.LinuxSyscall{ + { + Names: []string{"clone"}, + Action: rspec.ActAllow, + Args: []rspec.LinuxSeccompArg{ + { + Index: sysCloneFlagsIndex, + Value: syscall.CLONE_NEWNS | syscall.CLONE_NEWUTS | syscall.CLONE_NEWIPC | syscall.CLONE_NEWUSER | syscall.CLONE_NEWPID | syscall.CLONE_NEWNET, + ValueTwo: 0, + Op: rspec.OpMaskedEqual, + }, + }, + }, + }...) + + } + + arch := runtime.GOARCH + switch arch { + case "arm", "arm64": + syscalls = append(syscalls, []rspec.LinuxSyscall{ + { + Names: []string{ + "breakpoint", + "cacheflush", + "set_tls", + }, + Action: rspec.ActAllow, + Args: []rspec.LinuxSeccompArg{}, + }, + }...) + case "amd64", "x32": + syscalls = append(syscalls, []rspec.LinuxSyscall{ + { + Names: []string{"arch_prctl"}, + Action: rspec.ActAllow, + Args: []rspec.LinuxSeccompArg{}, + }, + }...) + fallthrough + case "x86": + syscalls = append(syscalls, []rspec.LinuxSyscall{ + { + Names: []string{"modify_ldt"}, + Action: rspec.ActAllow, + Args: []rspec.LinuxSeccompArg{}, + }, + }...) + case "s390", "s390x": + syscalls = append(syscalls, []rspec.LinuxSyscall{ + { + Names: []string{ + "s390_pci_mmio_read", + "s390_pci_mmio_write", + "s390_runtime_instr", + }, + Action: rspec.ActAllow, + Args: []rspec.LinuxSeccompArg{}, + }, + }...) + /* Flags parameter of the clone syscall is the 2nd on s390 */ + } + + return &rspec.LinuxSeccomp{ + DefaultAction: rspec.ActErrno, + Architectures: arches(), + Syscalls: syscalls, + } +} diff --git a/vendor/github.com/opencontainers/runtime-tools/generate/seccomp/syscall_compare.go b/vendor/github.com/opencontainers/runtime-tools/generate/seccomp/syscall_compare.go new file mode 100644 index 000000000..dbf2aec1c --- /dev/null +++ b/vendor/github.com/opencontainers/runtime-tools/generate/seccomp/syscall_compare.go @@ -0,0 +1,140 @@ +package seccomp + +import ( + "fmt" + "reflect" + "strconv" + "strings" + + rspec "github.com/opencontainers/runtime-spec/specs-go" +) + +// Determine if a new syscall rule should be appended, overwrite an existing rule +// or if no action should be taken at all +func decideCourseOfAction(newSyscall *rspec.LinuxSyscall, syscalls []rspec.LinuxSyscall) (string, error) { + ruleForSyscallAlreadyExists := false + + var sliceOfDeterminedActions []string + for i, syscall := range syscalls { + if sameName(&syscall, newSyscall) { + ruleForSyscallAlreadyExists = true + + if identical(newSyscall, &syscall) { + sliceOfDeterminedActions = append(sliceOfDeterminedActions, nothing) + } + + if sameAction(newSyscall, &syscall) { + if bothHaveArgs(newSyscall, &syscall) { + sliceOfDeterminedActions = append(sliceOfDeterminedActions, seccompAppend) + } + if onlyOneHasArgs(newSyscall, &syscall) { + if firstParamOnlyHasArgs(newSyscall, &syscall) { + sliceOfDeterminedActions = append(sliceOfDeterminedActions, "overwrite:"+strconv.Itoa(i)) + } else { + sliceOfDeterminedActions = append(sliceOfDeterminedActions, nothing) + } + } + } + + if !sameAction(newSyscall, &syscall) { + if bothHaveArgs(newSyscall, &syscall) { + if sameArgs(newSyscall, &syscall) { + sliceOfDeterminedActions = append(sliceOfDeterminedActions, "overwrite:"+strconv.Itoa(i)) + } + if !sameArgs(newSyscall, &syscall) { + sliceOfDeterminedActions = append(sliceOfDeterminedActions, seccompAppend) + } + } + if onlyOneHasArgs(newSyscall, &syscall) { + sliceOfDeterminedActions = append(sliceOfDeterminedActions, seccompAppend) + } + if neitherHasArgs(newSyscall, &syscall) { + sliceOfDeterminedActions = append(sliceOfDeterminedActions, "overwrite:"+strconv.Itoa(i)) + } + } + } + } + + if !ruleForSyscallAlreadyExists { + sliceOfDeterminedActions = append(sliceOfDeterminedActions, seccompAppend) + } + + // Nothing has highest priority + for _, determinedAction := range sliceOfDeterminedActions { + if determinedAction == nothing { + return determinedAction, nil + } + } + + // Overwrite has second highest priority + for _, determinedAction := range sliceOfDeterminedActions { + if strings.Contains(determinedAction, seccompOverwrite) { + return determinedAction, nil + } + } + + // Append has the lowest priority + for _, determinedAction := range sliceOfDeterminedActions { + if determinedAction == seccompAppend { + return determinedAction, nil + } + } + + return "", fmt.Errorf("Trouble determining action: %s", sliceOfDeterminedActions) +} + +func hasArguments(config *rspec.LinuxSyscall) bool { + nilSyscall := new(rspec.LinuxSyscall) + return !sameArgs(nilSyscall, config) +} + +func identical(config1, config2 *rspec.LinuxSyscall) bool { + return reflect.DeepEqual(config1, config2) +} + +func identicalExceptAction(config1, config2 *rspec.LinuxSyscall) bool { + samename := sameName(config1, config2) + sameAction := sameAction(config1, config2) + sameArgs := sameArgs(config1, config2) + + return samename && !sameAction && sameArgs +} + +func identicalExceptArgs(config1, config2 *rspec.LinuxSyscall) bool { + samename := sameName(config1, config2) + sameAction := sameAction(config1, config2) + sameArgs := sameArgs(config1, config2) + + return samename && sameAction && !sameArgs +} + +func sameName(config1, config2 *rspec.LinuxSyscall) bool { + return reflect.DeepEqual(config1.Names, config2.Names) +} + +func sameAction(config1, config2 *rspec.LinuxSyscall) bool { + return config1.Action == config2.Action +} + +func sameArgs(config1, config2 *rspec.LinuxSyscall) bool { + return reflect.DeepEqual(config1.Args, config2.Args) +} + +func bothHaveArgs(config1, config2 *rspec.LinuxSyscall) bool { + return hasArguments(config1) && hasArguments(config2) +} + +func onlyOneHasArgs(config1, config2 *rspec.LinuxSyscall) bool { + conf1 := hasArguments(config1) + conf2 := hasArguments(config2) + + return (conf1 && !conf2) || (!conf1 && conf2) +} + +func neitherHasArgs(config1, config2 *rspec.LinuxSyscall) bool { + return !hasArguments(config1) && !hasArguments(config2) +} + +func firstParamOnlyHasArgs(config1, config2 *rspec.LinuxSyscall) bool { + return !hasArguments(config1) && hasArguments(config2) +} diff --git a/vendor/github.com/opencontainers/runtime-tools/generate/spec.go b/vendor/github.com/opencontainers/runtime-tools/generate/spec.go new file mode 100644 index 000000000..519d25448 --- /dev/null +++ b/vendor/github.com/opencontainers/runtime-tools/generate/spec.go @@ -0,0 +1,109 @@ +package generate + +import ( + rspec "github.com/opencontainers/runtime-spec/specs-go" +) + +func (g *Generator) initSpec() { + if g.spec == nil { + g.spec = &rspec.Spec{} + } +} + +func (g *Generator) initSpecProcess() { + g.initSpec() + if g.spec.Process == nil { + g.spec.Process = &rspec.Process{} + } +} + +func (g *Generator) initSpecProcessConsoleSize() { + g.initSpecProcess() + if g.spec.Process.ConsoleSize == nil { + g.spec.Process.ConsoleSize = &rspec.Box{} + } +} + +func (g *Generator) initSpecProcessCapabilities() { + g.initSpecProcess() + if g.spec.Process.Capabilities == nil { + g.spec.Process.Capabilities = &rspec.LinuxCapabilities{} + } +} + +func (g *Generator) initSpecRoot() { + g.initSpec() + if g.spec.Root == nil { + g.spec.Root = &rspec.Root{} + } +} + +func (g *Generator) initSpecAnnotations() { + g.initSpec() + if g.spec.Annotations == nil { + g.spec.Annotations = make(map[string]string) + } +} + +func (g *Generator) initSpecHooks() { + g.initSpec() + if g.spec.Hooks == nil { + g.spec.Hooks = &rspec.Hooks{} + } +} + +func (g *Generator) initSpecLinux() { + g.initSpec() + if g.spec.Linux == nil { + g.spec.Linux = &rspec.Linux{} + } +} + +func (g *Generator) initSpecLinuxSysctl() { + g.initSpecLinux() + if g.spec.Linux.Sysctl == nil { + g.spec.Linux.Sysctl = make(map[string]string) + } +} + +func (g *Generator) initSpecLinuxSeccomp() { + g.initSpecLinux() + if g.spec.Linux.Seccomp == nil { + g.spec.Linux.Seccomp = &rspec.LinuxSeccomp{} + } +} + +func (g *Generator) initSpecLinuxResources() { + g.initSpecLinux() + if g.spec.Linux.Resources == nil { + g.spec.Linux.Resources = &rspec.LinuxResources{} + } +} + +func (g *Generator) initSpecLinuxResourcesCPU() { + g.initSpecLinuxResources() + if g.spec.Linux.Resources.CPU == nil { + g.spec.Linux.Resources.CPU = &rspec.LinuxCPU{} + } +} + +func (g *Generator) initSpecLinuxResourcesMemory() { + g.initSpecLinuxResources() + if g.spec.Linux.Resources.Memory == nil { + g.spec.Linux.Resources.Memory = &rspec.LinuxMemory{} + } +} + +func (g *Generator) initSpecLinuxResourcesNetwork() { + g.initSpecLinuxResources() + if g.spec.Linux.Resources.Network == nil { + g.spec.Linux.Resources.Network = &rspec.LinuxNetwork{} + } +} + +func (g *Generator) initSpecLinuxResourcesPids() { + g.initSpecLinuxResources() + if g.spec.Linux.Resources.Pids == nil { + g.spec.Linux.Resources.Pids = &rspec.LinuxPids{} + } +} diff --git a/vendor/github.com/opencontainers/runtime-tools/specerror/error.go b/vendor/github.com/opencontainers/runtime-tools/specerror/error.go new file mode 100644 index 000000000..c75bb6b14 --- /dev/null +++ b/vendor/github.com/opencontainers/runtime-tools/specerror/error.go @@ -0,0 +1,170 @@ +// Package specerror implements runtime-spec-specific tooling for +// tracking RFC 2119 violations. +package specerror + +import ( + "fmt" + + "github.com/hashicorp/go-multierror" + rfc2119 "github.com/opencontainers/runtime-tools/error" +) + +const referenceTemplate = "https://github.com/opencontainers/runtime-spec/blob/v%s/%s" + +// Code represents the spec violation, enumerating both +// configuration violations and runtime violations. +type Code int + +const ( + // NonError represents that an input is not an error + NonError Code = iota + // NonRFCError represents that an error is not a rfc2119 error + NonRFCError + + // ConfigFileExistence represents the error code of 'config.json' existence test + ConfigFileExistence + // ArtifactsInSingleDir represents the error code of artifacts place test + ArtifactsInSingleDir + + // SpecVersion represents the error code of specfication version test + SpecVersion + + // RootOnNonHyperV represents the error code of root setting test on non hyper-v containers + RootOnNonHyperV + // RootOnHyperV represents the error code of root setting test on hyper-v containers + RootOnHyperV + // PathFormatOnWindows represents the error code of the path format test on Window + PathFormatOnWindows + // PathName represents the error code of the path name test + PathName + // PathExistence represents the error code of the path existence test + PathExistence + // ReadonlyFilesystem represents the error code of readonly test + ReadonlyFilesystem + // ReadonlyOnWindows represents the error code of readonly setting test on Windows + ReadonlyOnWindows + + // DefaultFilesystems represents the error code of default filesystems test + DefaultFilesystems + + // CreateWithID represents the error code of 'create' lifecyle test with 'id' provided + CreateWithID + // CreateWithUniqueID represents the error code of 'create' lifecyle test with unique 'id' provided + CreateWithUniqueID + // CreateNewContainer represents the error code 'create' lifecyle test that creates new container + CreateNewContainer +) + +type errorTemplate struct { + Level rfc2119.Level + Reference func(version string) (reference string, err error) +} + +// Error represents a runtime-spec violation. +type Error struct { + // Err holds the RFC 2119 violation. + Err rfc2119.Error + + // Code is a matchable holds a Code + Code Code +} + +var ( + containerFormatRef = func(version string) (reference string, err error) { + return fmt.Sprintf(referenceTemplate, version, "bundle.md#container-format"), nil + } + specVersionRef = func(version string) (reference string, err error) { + return fmt.Sprintf(referenceTemplate, version, "config.md#specification-version"), nil + } + rootRef = func(version string) (reference string, err error) { + return fmt.Sprintf(referenceTemplate, version, "config.md#root"), nil + } + defaultFSRef = func(version string) (reference string, err error) { + return fmt.Sprintf(referenceTemplate, version, "config-linux.md#default-filesystems"), nil + } + runtimeCreateRef = func(version string) (reference string, err error) { + return fmt.Sprintf(referenceTemplate, version, "runtime.md#create"), nil + } +) + +var ociErrors = map[Code]errorTemplate{ + // Bundle.md + // Container Format + ConfigFileExistence: {Level: rfc2119.Must, Reference: containerFormatRef}, + ArtifactsInSingleDir: {Level: rfc2119.Must, Reference: containerFormatRef}, + + // Config.md + // Specification Version + SpecVersion: {Level: rfc2119.Must, Reference: specVersionRef}, + // Root + RootOnNonHyperV: {Level: rfc2119.Required, Reference: rootRef}, + RootOnHyperV: {Level: rfc2119.Must, Reference: rootRef}, + // TODO: add tests for 'PathFormatOnWindows' + PathFormatOnWindows: {Level: rfc2119.Must, Reference: rootRef}, + PathName: {Level: rfc2119.Should, Reference: rootRef}, + PathExistence: {Level: rfc2119.Must, Reference: rootRef}, + ReadonlyFilesystem: {Level: rfc2119.Must, Reference: rootRef}, + ReadonlyOnWindows: {Level: rfc2119.Must, Reference: rootRef}, + + // Config-Linux.md + // Default Filesystems + DefaultFilesystems: {Level: rfc2119.Should, Reference: defaultFSRef}, + + // Runtime.md + // Create + CreateWithID: {Level: rfc2119.Must, Reference: runtimeCreateRef}, + CreateWithUniqueID: {Level: rfc2119.Must, Reference: runtimeCreateRef}, + CreateNewContainer: {Level: rfc2119.Must, Reference: runtimeCreateRef}, +} + +// Error returns the error message with specification reference. +func (err *Error) Error() string { + return err.Err.Error() +} + +// NewError creates an Error referencing a spec violation. The error +// can be cast to an *Error for extracting structured information +// about the level of the violation and a reference to the violated +// spec condition. +// +// A version string (for the version of the spec that was violated) +// must be set to get a working URL. +func NewError(code Code, err error, version string) error { + template := ociErrors[code] + reference, err2 := template.Reference(version) + if err2 != nil { + return err2 + } + return &Error{ + Err: rfc2119.Error{ + Level: template.Level, + Reference: reference, + Err: err, + }, + Code: code, + } +} + +// FindError finds an error from a source error (multiple error) and +// returns the error code if found. +// If the source error is nil or empty, return NonError. +// If the source error is not a multiple error, return NonRFCError. +func FindError(err error, code Code) Code { + if err == nil { + return NonError + } + + if merr, ok := err.(*multierror.Error); ok { + if merr.ErrorOrNil() == nil { + return NonError + } + for _, e := range merr.Errors { + if rfcErr, ok := e.(*Error); ok { + if rfcErr.Code == code { + return code + } + } + } + } + return NonRFCError +} diff --git a/vendor/github.com/opencontainers/runtime-tools/validate/validate.go b/vendor/github.com/opencontainers/runtime-tools/validate/validate.go new file mode 100644 index 000000000..bbdb29c60 --- /dev/null +++ b/vendor/github.com/opencontainers/runtime-tools/validate/validate.go @@ -0,0 +1,1050 @@ +package validate + +import ( + "bufio" + "encoding/json" + "errors" + "fmt" + "io/ioutil" + "net" + "os" + "path/filepath" + "reflect" + "regexp" + "runtime" + "strings" + "syscall" + "unicode" + "unicode/utf8" + + "github.com/blang/semver" + "github.com/hashicorp/go-multierror" + rspec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/sirupsen/logrus" + "github.com/syndtr/gocapability/capability" + + "github.com/opencontainers/runtime-tools/specerror" +) + +const specConfig = "config.json" + +var ( + defaultRlimits = []string{ + "RLIMIT_AS", + "RLIMIT_CORE", + "RLIMIT_CPU", + "RLIMIT_DATA", + "RLIMIT_FSIZE", + "RLIMIT_LOCKS", + "RLIMIT_MEMLOCK", + "RLIMIT_MSGQUEUE", + "RLIMIT_NICE", + "RLIMIT_NOFILE", + "RLIMIT_NPROC", + "RLIMIT_RSS", + "RLIMIT_RTPRIO", + "RLIMIT_RTTIME", + "RLIMIT_SIGPENDING", + "RLIMIT_STACK", + } +) + +// Validator represents a validator for runtime bundle +type Validator struct { + spec *rspec.Spec + bundlePath string + HostSpecific bool + platform string +} + +// NewValidator creates a Validator +func NewValidator(spec *rspec.Spec, bundlePath string, hostSpecific bool, platform string) Validator { + if hostSpecific && platform != runtime.GOOS { + platform = runtime.GOOS + } + return Validator{ + spec: spec, + bundlePath: bundlePath, + HostSpecific: hostSpecific, + platform: platform, + } +} + +// NewValidatorFromPath creates a Validator with specified bundle path +func NewValidatorFromPath(bundlePath string, hostSpecific bool, platform string) (Validator, error) { + if hostSpecific && platform != runtime.GOOS { + platform = runtime.GOOS + } + if bundlePath == "" { + return Validator{}, fmt.Errorf("bundle path shouldn't be empty") + } + + if _, err := os.Stat(bundlePath); err != nil { + return Validator{}, err + } + + configPath := filepath.Join(bundlePath, specConfig) + content, err := ioutil.ReadFile(configPath) + if err != nil { + return Validator{}, specerror.NewError(specerror.ConfigFileExistence, err, rspec.Version) + } + if !utf8.Valid(content) { + return Validator{}, fmt.Errorf("%q is not encoded in UTF-8", configPath) + } + var spec rspec.Spec + if err = json.Unmarshal(content, &spec); err != nil { + return Validator{}, err + } + + return NewValidator(&spec, bundlePath, hostSpecific, platform), nil +} + +// CheckAll checks all parts of runtime bundle +func (v *Validator) CheckAll() (errs error) { + errs = multierror.Append(errs, v.CheckPlatform()) + errs = multierror.Append(errs, v.CheckRoot()) + errs = multierror.Append(errs, v.CheckMandatoryFields()) + errs = multierror.Append(errs, v.CheckSemVer()) + errs = multierror.Append(errs, v.CheckMounts()) + errs = multierror.Append(errs, v.CheckProcess()) + errs = multierror.Append(errs, v.CheckHooks()) + errs = multierror.Append(errs, v.CheckLinux()) + + return +} + +// CheckRoot checks status of v.spec.Root +func (v *Validator) CheckRoot() (errs error) { + logrus.Debugf("check root") + + if v.platform == "windows" && v.spec.Windows != nil && v.spec.Windows.HyperV != nil { + if v.spec.Root != nil { + errs = multierror.Append(errs, + specerror.NewError(specerror.RootOnHyperV, fmt.Errorf("for Hyper-V containers, Root must not be set"), rspec.Version)) + return + } + return + } else if v.spec.Root == nil { + errs = multierror.Append(errs, + specerror.NewError(specerror.RootOnNonHyperV, fmt.Errorf("for non-Hyper-V containers, Root must be set"), rspec.Version)) + return + } + + absBundlePath, err := filepath.Abs(v.bundlePath) + if err != nil { + errs = multierror.Append(errs, fmt.Errorf("unable to convert %q to an absolute path", v.bundlePath)) + return + } + + if filepath.Base(v.spec.Root.Path) != "rootfs" { + errs = multierror.Append(errs, + specerror.NewError(specerror.PathName, fmt.Errorf("path name should be the conventional 'rootfs'"), rspec.Version)) + } + + var rootfsPath string + var absRootPath string + if filepath.IsAbs(v.spec.Root.Path) { + rootfsPath = v.spec.Root.Path + absRootPath = filepath.Clean(rootfsPath) + } else { + var err error + rootfsPath = filepath.Join(v.bundlePath, v.spec.Root.Path) + absRootPath, err = filepath.Abs(rootfsPath) + if err != nil { + errs = multierror.Append(errs, fmt.Errorf("unable to convert %q to an absolute path", rootfsPath)) + return + } + } + + if fi, err := os.Stat(rootfsPath); err != nil { + errs = multierror.Append(errs, + specerror.NewError(specerror.PathExistence, fmt.Errorf("cannot find the root path %q", rootfsPath), rspec.Version)) + } else if !fi.IsDir() { + errs = multierror.Append(errs, + specerror.NewError(specerror.PathExistence, fmt.Errorf("root.path %q is not a directory", rootfsPath), rspec.Version)) + } + + rootParent := filepath.Dir(absRootPath) + if absRootPath == string(filepath.Separator) || rootParent != absBundlePath { + errs = multierror.Append(errs, + specerror.NewError(specerror.ArtifactsInSingleDir, fmt.Errorf("root.path is %q, but it MUST be a child of %q", v.spec.Root.Path, absBundlePath), rspec.Version)) + } + + if v.platform == "windows" { + if v.spec.Root.Readonly { + errs = multierror.Append(errs, + specerror.NewError(specerror.ReadonlyOnWindows, fmt.Errorf("root.readonly field MUST be omitted or false when target platform is windows"), rspec.Version)) + } + } + + return +} + +// CheckSemVer checks v.spec.Version +func (v *Validator) CheckSemVer() (errs error) { + logrus.Debugf("check semver") + + version := v.spec.Version + _, err := semver.Parse(version) + if err != nil { + errs = multierror.Append(errs, + specerror.NewError(specerror.SpecVersion, fmt.Errorf("%q is not valid SemVer: %s", version, err.Error()), rspec.Version)) + } + if version != rspec.Version { + errs = multierror.Append(errs, fmt.Errorf("validate currently only handles version %s, but the supplied configuration targets %s", rspec.Version, version)) + } + + return +} + +// CheckHooks check v.spec.Hooks +func (v *Validator) CheckHooks() (errs error) { + logrus.Debugf("check hooks") + + if v.spec.Hooks != nil { + errs = multierror.Append(errs, checkEventHooks("pre-start", v.spec.Hooks.Prestart, v.HostSpecific)) + errs = multierror.Append(errs, checkEventHooks("post-start", v.spec.Hooks.Poststart, v.HostSpecific)) + errs = multierror.Append(errs, checkEventHooks("post-stop", v.spec.Hooks.Poststop, v.HostSpecific)) + } + + return +} + +func checkEventHooks(hookType string, hooks []rspec.Hook, hostSpecific bool) (errs error) { + for _, hook := range hooks { + if !filepath.IsAbs(hook.Path) { + errs = multierror.Append(errs, fmt.Errorf("the %s hook %v: is not absolute path", hookType, hook.Path)) + } + + if hostSpecific { + fi, err := os.Stat(hook.Path) + if err != nil { + errs = multierror.Append(errs, fmt.Errorf("cannot find %s hook: %v", hookType, hook.Path)) + } + if fi.Mode()&0111 == 0 { + errs = multierror.Append(errs, fmt.Errorf("the %s hook %v: is not executable", hookType, hook.Path)) + } + } + + for _, env := range hook.Env { + if !envValid(env) { + errs = multierror.Append(errs, fmt.Errorf("env %q for hook %v is in the invalid form", env, hook.Path)) + } + } + } + + return +} + +// CheckProcess checks v.spec.Process +func (v *Validator) CheckProcess() (errs error) { + logrus.Debugf("check process") + + if v.spec.Process == nil { + return + } + + process := v.spec.Process + if !filepath.IsAbs(process.Cwd) { + errs = multierror.Append(errs, fmt.Errorf("cwd %q is not an absolute path", process.Cwd)) + } + + for _, env := range process.Env { + if !envValid(env) { + errs = multierror.Append(errs, fmt.Errorf("env %q should be in the form of 'key=value'. The left hand side must consist solely of letters, digits, and underscores '_'", env)) + } + } + + if len(process.Args) == 0 { + errs = multierror.Append(errs, fmt.Errorf("args must not be empty")) + } else { + if filepath.IsAbs(process.Args[0]) { + var rootfsPath string + if filepath.IsAbs(v.spec.Root.Path) { + rootfsPath = v.spec.Root.Path + } else { + rootfsPath = filepath.Join(v.bundlePath, v.spec.Root.Path) + } + absPath := filepath.Join(rootfsPath, process.Args[0]) + fileinfo, err := os.Stat(absPath) + if os.IsNotExist(err) { + logrus.Warnf("executable %q is not available in rootfs currently", process.Args[0]) + } else if err != nil { + errs = multierror.Append(errs, err) + } else { + m := fileinfo.Mode() + if m.IsDir() || m&0111 == 0 { + errs = multierror.Append(errs, fmt.Errorf("arg %q is not executable", process.Args[0])) + } + } + } + } + + if v.spec.Process.Capabilities != nil { + errs = multierror.Append(errs, v.CheckCapabilities()) + } + errs = multierror.Append(errs, v.CheckRlimits()) + + if v.platform == "linux" { + if len(process.ApparmorProfile) > 0 { + profilePath := filepath.Join(v.bundlePath, v.spec.Root.Path, "/etc/apparmor.d", process.ApparmorProfile) + _, err := os.Stat(profilePath) + if err != nil { + errs = multierror.Append(errs, err) + } + } + } + + return +} + +// CheckCapabilities checks v.spec.Process.Capabilities +func (v *Validator) CheckCapabilities() (errs error) { + process := v.spec.Process + if v.platform == "linux" { + var effective, permitted, inheritable, ambient bool + caps := make(map[string][]string) + + for _, cap := range process.Capabilities.Bounding { + caps[cap] = append(caps[cap], "bounding") + } + for _, cap := range process.Capabilities.Effective { + caps[cap] = append(caps[cap], "effective") + } + for _, cap := range process.Capabilities.Inheritable { + caps[cap] = append(caps[cap], "inheritable") + } + for _, cap := range process.Capabilities.Permitted { + caps[cap] = append(caps[cap], "permitted") + } + for _, cap := range process.Capabilities.Ambient { + caps[cap] = append(caps[cap], "ambient") + } + + for capability, owns := range caps { + if err := CapValid(capability, v.HostSpecific); err != nil { + errs = multierror.Append(errs, fmt.Errorf("capability %q is not valid, man capabilities(7)", capability)) + } + + effective, permitted, ambient, inheritable = false, false, false, false + for _, set := range owns { + if set == "effective" { + effective = true + continue + } + if set == "inheritable" { + inheritable = true + continue + } + if set == "permitted" { + permitted = true + continue + } + if set == "ambient" { + ambient = true + continue + } + } + if effective && !permitted { + errs = multierror.Append(errs, fmt.Errorf("effective capability %q is not allowed, as it's not permitted", capability)) + } + if ambient && !(effective && inheritable) { + errs = multierror.Append(errs, fmt.Errorf("ambient capability %q is not allowed, as it's not permitted and inheribate", capability)) + } + } + } else { + logrus.Warnf("process.capabilities validation not yet implemented for OS %q", v.platform) + } + + return +} + +// CheckRlimits checks v.spec.Process.Rlimits +func (v *Validator) CheckRlimits() (errs error) { + process := v.spec.Process + for index, rlimit := range process.Rlimits { + for i := index + 1; i < len(process.Rlimits); i++ { + if process.Rlimits[index].Type == process.Rlimits[i].Type { + errs = multierror.Append(errs, fmt.Errorf("rlimit can not contain the same type %q", process.Rlimits[index].Type)) + } + } + errs = multierror.Append(errs, v.rlimitValid(rlimit)) + } + + return +} + +func supportedMountTypes(OS string, hostSpecific bool) (map[string]bool, error) { + supportedTypes := make(map[string]bool) + + if OS != "linux" && OS != "windows" { + logrus.Warnf("%v is not supported to check mount type", OS) + return nil, nil + } else if OS == "windows" { + supportedTypes["ntfs"] = true + return supportedTypes, nil + } + + if hostSpecific { + f, err := os.Open("/proc/filesystems") + if err != nil { + return nil, err + } + defer f.Close() + + s := bufio.NewScanner(f) + for s.Scan() { + if err := s.Err(); err != nil { + return supportedTypes, err + } + + text := s.Text() + parts := strings.Split(text, "\t") + if len(parts) > 1 { + supportedTypes[parts[1]] = true + } else { + supportedTypes[parts[0]] = true + } + } + + supportedTypes["bind"] = true + + return supportedTypes, nil + } + logrus.Warn("Checking linux mount types without --host-specific is not supported yet") + return nil, nil +} + +// CheckMounts checks v.spec.Mounts +func (v *Validator) CheckMounts() (errs error) { + logrus.Debugf("check mounts") + + supportedTypes, err := supportedMountTypes(v.platform, v.HostSpecific) + if err != nil { + errs = multierror.Append(errs, err) + return + } + + for i, mountA := range v.spec.Mounts { + if supportedTypes != nil && !supportedTypes[mountA.Type] { + errs = multierror.Append(errs, fmt.Errorf("unsupported mount type %q", mountA.Type)) + } + if v.platform == "windows" { + if err := pathValid(v.platform, mountA.Destination); err != nil { + errs = multierror.Append(errs, err) + } + if err := pathValid(v.platform, mountA.Source); err != nil { + errs = multierror.Append(errs, err) + } + } else { + if err := pathValid(v.platform, mountA.Destination); err != nil { + errs = multierror.Append(errs, err) + } + } + for j, mountB := range v.spec.Mounts { + if i == j { + continue + } + // whether B.Desination is nested within A.Destination + nested, err := nestedValid(v.platform, mountA.Destination, mountB.Destination) + if err != nil { + errs = multierror.Append(errs, err) + continue + } + if nested { + if v.platform == "windows" && i < j { + errs = multierror.Append(errs, fmt.Errorf("on Windows, %v nested within %v is forbidden", mountB.Destination, mountA.Destination)) + } + if i > j { + logrus.Warnf("%v will be covered by %v", mountB.Destination, mountA.Destination) + } + } + } + } + + return +} + +// CheckPlatform checks v.platform +func (v *Validator) CheckPlatform() (errs error) { + logrus.Debugf("check platform") + + if v.platform != "linux" && v.platform != "solaris" && v.platform != "windows" { + errs = multierror.Append(errs, fmt.Errorf("platform %q is not supported", v.platform)) + return + } + + if v.platform == "windows" { + if v.spec.Windows == nil { + errs = multierror.Append(errs, errors.New("'windows' MUST be set when platform is `windows`")) + } + } + + return +} + +// CheckLinux checks v.spec.Linux +func (v *Validator) CheckLinux() (errs error) { + logrus.Debugf("check linux") + + if v.spec.Linux == nil { + return + } + + var nsTypeList = map[rspec.LinuxNamespaceType]struct { + num int + newExist bool + }{ + rspec.PIDNamespace: {0, false}, + rspec.NetworkNamespace: {0, false}, + rspec.MountNamespace: {0, false}, + rspec.IPCNamespace: {0, false}, + rspec.UTSNamespace: {0, false}, + rspec.UserNamespace: {0, false}, + rspec.CgroupNamespace: {0, false}, + } + + for index := 0; index < len(v.spec.Linux.Namespaces); index++ { + ns := v.spec.Linux.Namespaces[index] + if !namespaceValid(ns) { + errs = multierror.Append(errs, fmt.Errorf("namespace %v is invalid", ns)) + } + + tmpItem := nsTypeList[ns.Type] + tmpItem.num = tmpItem.num + 1 + if tmpItem.num > 1 { + errs = multierror.Append(errs, fmt.Errorf("duplicated namespace %q", ns.Type)) + } + + if len(ns.Path) == 0 { + tmpItem.newExist = true + } + nsTypeList[ns.Type] = tmpItem + } + + if (len(v.spec.Linux.UIDMappings) > 0 || len(v.spec.Linux.GIDMappings) > 0) && !nsTypeList[rspec.UserNamespace].newExist { + errs = multierror.Append(errs, errors.New("the UID/GID mappings requires a new User namespace to be specified as well")) + } else if len(v.spec.Linux.UIDMappings) > 5 { + errs = multierror.Append(errs, errors.New("only 5 UID mappings are allowed (linux kernel restriction)")) + } else if len(v.spec.Linux.GIDMappings) > 5 { + errs = multierror.Append(errs, errors.New("only 5 GID mappings are allowed (linux kernel restriction)")) + } + + for k := range v.spec.Linux.Sysctl { + if strings.HasPrefix(k, "net.") && !nsTypeList[rspec.NetworkNamespace].newExist { + errs = multierror.Append(errs, fmt.Errorf("sysctl %v requires a new Network namespace to be specified as well", k)) + } + if strings.HasPrefix(k, "fs.mqueue.") { + if !nsTypeList[rspec.MountNamespace].newExist || !nsTypeList[rspec.IPCNamespace].newExist { + errs = multierror.Append(errs, fmt.Errorf("sysctl %v requires a new IPC namespace and Mount namespace to be specified as well", k)) + } + } + } + + if v.platform == "linux" && !nsTypeList[rspec.UTSNamespace].newExist && v.spec.Hostname != "" { + errs = multierror.Append(errs, fmt.Errorf("on Linux, hostname requires a new UTS namespace to be specified as well")) + } + + // Linux devices validation + devList := make(map[string]bool) + devTypeList := make(map[string]bool) + for index := 0; index < len(v.spec.Linux.Devices); index++ { + device := v.spec.Linux.Devices[index] + if !deviceValid(device) { + errs = multierror.Append(errs, fmt.Errorf("device %v is invalid", device)) + } + + if _, exists := devList[device.Path]; exists { + errs = multierror.Append(errs, fmt.Errorf("device %s is duplicated", device.Path)) + } else { + var rootfsPath string + if filepath.IsAbs(v.spec.Root.Path) { + rootfsPath = v.spec.Root.Path + } else { + rootfsPath = filepath.Join(v.bundlePath, v.spec.Root.Path) + } + absPath := filepath.Join(rootfsPath, device.Path) + fi, err := os.Stat(absPath) + if os.IsNotExist(err) { + devList[device.Path] = true + } else if err != nil { + errs = multierror.Append(errs, err) + } else { + fStat, ok := fi.Sys().(*syscall.Stat_t) + if !ok { + errs = multierror.Append(errs, fmt.Errorf("cannot determine state for device %s", device.Path)) + continue + } + var devType string + switch fStat.Mode & syscall.S_IFMT { + case syscall.S_IFCHR: + devType = "c" + case syscall.S_IFBLK: + devType = "b" + case syscall.S_IFIFO: + devType = "p" + default: + devType = "unmatched" + } + if devType != device.Type || (devType == "c" && device.Type == "u") { + errs = multierror.Append(errs, fmt.Errorf("unmatched %s already exists in filesystem", device.Path)) + continue + } + if devType != "p" { + dev := fStat.Rdev + major := (dev >> 8) & 0xfff + minor := (dev & 0xff) | ((dev >> 12) & 0xfff00) + if int64(major) != device.Major || int64(minor) != device.Minor { + errs = multierror.Append(errs, fmt.Errorf("unmatched %s already exists in filesystem", device.Path)) + continue + } + } + if device.FileMode != nil { + expectedPerm := *device.FileMode & os.ModePerm + actualPerm := fi.Mode() & os.ModePerm + if expectedPerm != actualPerm { + errs = multierror.Append(errs, fmt.Errorf("unmatched %s already exists in filesystem", device.Path)) + continue + } + } + if device.UID != nil { + if *device.UID != fStat.Uid { + errs = multierror.Append(errs, fmt.Errorf("unmatched %s already exists in filesystem", device.Path)) + continue + } + } + if device.GID != nil { + if *device.GID != fStat.Gid { + errs = multierror.Append(errs, fmt.Errorf("unmatched %s already exists in filesystem", device.Path)) + continue + } + } + } + } + + // unify u->c when comparing, they are synonyms + var devID string + if device.Type == "u" { + devID = fmt.Sprintf("%s:%d:%d", "c", device.Major, device.Minor) + } else { + devID = fmt.Sprintf("%s:%d:%d", device.Type, device.Major, device.Minor) + } + + if _, exists := devTypeList[devID]; exists { + logrus.Warnf("type:%s, major:%d and minor:%d for linux devices is duplicated", device.Type, device.Major, device.Minor) + } else { + devTypeList[devID] = true + } + } + + if v.spec.Linux.Resources != nil { + errs = multierror.Append(errs, v.CheckLinuxResources()) + } + + if v.spec.Linux.Seccomp != nil { + errs = multierror.Append(errs, v.CheckSeccomp()) + } + + switch v.spec.Linux.RootfsPropagation { + case "": + case "private": + case "rprivate": + case "slave": + case "rslave": + case "shared": + case "rshared": + case "unbindable": + case "runbindable": + default: + errs = multierror.Append(errs, errors.New("rootfsPropagation must be empty or one of \"private|rprivate|slave|rslave|shared|rshared|unbindable|runbindable\"")) + } + + for _, maskedPath := range v.spec.Linux.MaskedPaths { + if !strings.HasPrefix(maskedPath, "/") { + errs = multierror.Append(errs, fmt.Errorf("maskedPath %v is not an absolute path", maskedPath)) + } + } + + for _, readonlyPath := range v.spec.Linux.ReadonlyPaths { + if !strings.HasPrefix(readonlyPath, "/") { + errs = multierror.Append(errs, fmt.Errorf("readonlyPath %v is not an absolute path", readonlyPath)) + } + } + + return +} + +// CheckLinuxResources checks v.spec.Linux.Resources +func (v *Validator) CheckLinuxResources() (errs error) { + logrus.Debugf("check linux resources") + + r := v.spec.Linux.Resources + if r.Memory != nil { + if r.Memory.Limit != nil && r.Memory.Swap != nil && uint64(*r.Memory.Limit) > uint64(*r.Memory.Swap) { + errs = multierror.Append(errs, fmt.Errorf("minimum memoryswap should be larger than memory limit")) + } + if r.Memory.Limit != nil && r.Memory.Reservation != nil && uint64(*r.Memory.Reservation) > uint64(*r.Memory.Limit) { + errs = multierror.Append(errs, fmt.Errorf("minimum memory limit should be larger than memory reservation")) + } + } + if r.Network != nil && v.HostSpecific { + var exist bool + interfaces, err := net.Interfaces() + if err != nil { + errs = multierror.Append(errs, err) + return + } + for _, prio := range r.Network.Priorities { + exist = false + for _, ni := range interfaces { + if prio.Name == ni.Name { + exist = true + break + } + } + if !exist { + errs = multierror.Append(errs, fmt.Errorf("interface %s does not exist currently", prio.Name)) + } + } + } + for index := 0; index < len(r.Devices); index++ { + switch r.Devices[index].Type { + case "a", "b", "c": + default: + errs = multierror.Append(errs, fmt.Errorf("type of devices %s is invalid", r.Devices[index].Type)) + } + + access := []byte(r.Devices[index].Access) + for i := 0; i < len(access); i++ { + switch access[i] { + case 'r', 'w', 'm': + default: + errs = multierror.Append(errs, fmt.Errorf("access %s is invalid", r.Devices[index].Access)) + return + } + } + } + + return +} + +// CheckSeccomp checkc v.spec.Linux.Seccomp +func (v *Validator) CheckSeccomp() (errs error) { + logrus.Debugf("check linux seccomp") + + s := v.spec.Linux.Seccomp + if !seccompActionValid(s.DefaultAction) { + errs = multierror.Append(errs, fmt.Errorf("seccomp defaultAction %q is invalid", s.DefaultAction)) + } + for index := 0; index < len(s.Syscalls); index++ { + if !syscallValid(s.Syscalls[index]) { + errs = multierror.Append(errs, fmt.Errorf("syscall %v is invalid", s.Syscalls[index])) + } + } + for index := 0; index < len(s.Architectures); index++ { + switch s.Architectures[index] { + case rspec.ArchX86: + case rspec.ArchX86_64: + case rspec.ArchX32: + case rspec.ArchARM: + case rspec.ArchAARCH64: + case rspec.ArchMIPS: + case rspec.ArchMIPS64: + case rspec.ArchMIPS64N32: + case rspec.ArchMIPSEL: + case rspec.ArchMIPSEL64: + case rspec.ArchMIPSEL64N32: + case rspec.ArchPPC: + case rspec.ArchPPC64: + case rspec.ArchPPC64LE: + case rspec.ArchS390: + case rspec.ArchS390X: + case rspec.ArchPARISC: + case rspec.ArchPARISC64: + default: + errs = multierror.Append(errs, fmt.Errorf("seccomp architecture %q is invalid", s.Architectures[index])) + } + } + + return +} + +// CapValid checks whether a capability is valid +func CapValid(c string, hostSpecific bool) error { + isValid := false + + if !strings.HasPrefix(c, "CAP_") { + return fmt.Errorf("capability %s must start with CAP_", c) + } + for _, cap := range capability.List() { + if c == fmt.Sprintf("CAP_%s", strings.ToUpper(cap.String())) { + if hostSpecific && cap > LastCap() { + return fmt.Errorf("%s is not supported on the current host", c) + } + isValid = true + break + } + } + + if !isValid { + return fmt.Errorf("invalid capability: %s", c) + } + return nil +} + +// LastCap return last cap of system +func LastCap() capability.Cap { + last := capability.CAP_LAST_CAP + // hack for RHEL6 which has no /proc/sys/kernel/cap_last_cap + if last == capability.Cap(63) { + last = capability.CAP_BLOCK_SUSPEND + } + + return last +} + +func envValid(env string) bool { + items := strings.Split(env, "=") + if len(items) < 2 { + return false + } + for i, ch := range strings.TrimSpace(items[0]) { + if !unicode.IsDigit(ch) && !unicode.IsLetter(ch) && ch != '_' { + return false + } + if i == 0 && unicode.IsDigit(ch) { + logrus.Warnf("Env %v: variable name beginning with digit is not recommended.", env) + } + } + return true +} + +func (v *Validator) rlimitValid(rlimit rspec.POSIXRlimit) (errs error) { + if rlimit.Hard < rlimit.Soft { + errs = multierror.Append(errs, fmt.Errorf("hard limit of rlimit %s should not be less than soft limit", rlimit.Type)) + } + + if v.platform == "linux" { + for _, val := range defaultRlimits { + if val == rlimit.Type { + return + } + } + errs = multierror.Append(errs, fmt.Errorf("rlimit type %q is invalid", rlimit.Type)) + } else { + logrus.Warnf("process.rlimits validation not yet implemented for platform %q", v.platform) + } + + return +} + +func namespaceValid(ns rspec.LinuxNamespace) bool { + switch ns.Type { + case rspec.PIDNamespace: + case rspec.NetworkNamespace: + case rspec.MountNamespace: + case rspec.IPCNamespace: + case rspec.UTSNamespace: + case rspec.UserNamespace: + case rspec.CgroupNamespace: + default: + return false + } + + if ns.Path != "" && !filepath.IsAbs(ns.Path) { + return false + } + + return true +} + +func pathValid(os, path string) error { + if os == "windows" { + matched, err := regexp.MatchString("^[a-zA-Z]:(\\\\[^\\\\/<>|:*?\"]+)+$", path) + if err != nil { + return err + } + if !matched { + return fmt.Errorf("invalid windows path %v", path) + } + return nil + } + if !filepath.IsAbs(path) { + return fmt.Errorf("%v is not an absolute path", path) + } + return nil +} + +// Check whether pathB is nested whithin pathA +func nestedValid(os, pathA, pathB string) (bool, error) { + if pathA == pathB { + return false, nil + } + if pathA == "/" && pathB != "" { + return true, nil + } + + var sep string + if os == "windows" { + sep = "\\" + } else { + sep = "/" + } + + splitedPathA := strings.Split(filepath.Clean(pathA), sep) + splitedPathB := strings.Split(filepath.Clean(pathB), sep) + lenA := len(splitedPathA) + lenB := len(splitedPathB) + + if lenA > lenB { + if (lenA - lenB) == 1 { + // if pathA is longer but not end with separator + if splitedPathA[lenA-1] != "" { + return false, nil + } + splitedPathA = splitedPathA[:lenA-1] + } else { + return false, nil + } + } + + for i, partA := range splitedPathA { + if partA != splitedPathB[i] { + return false, nil + } + } + + return true, nil +} + +func deviceValid(d rspec.LinuxDevice) bool { + switch d.Type { + case "b", "c", "u": + if d.Major <= 0 || d.Minor <= 0 { + return false + } + case "p": + if d.Major > 0 || d.Minor > 0 { + return false + } + default: + return false + } + return true +} + +func seccompActionValid(secc rspec.LinuxSeccompAction) bool { + switch secc { + case "": + case rspec.ActKill: + case rspec.ActTrap: + case rspec.ActErrno: + case rspec.ActTrace: + case rspec.ActAllow: + default: + return false + } + return true +} + +func syscallValid(s rspec.LinuxSyscall) bool { + if !seccompActionValid(s.Action) { + return false + } + for index := 0; index < len(s.Args); index++ { + arg := s.Args[index] + switch arg.Op { + case rspec.OpNotEqual: + case rspec.OpLessThan: + case rspec.OpLessEqual: + case rspec.OpEqualTo: + case rspec.OpGreaterEqual: + case rspec.OpGreaterThan: + case rspec.OpMaskedEqual: + default: + return false + } + } + return true +} + +func isStruct(t reflect.Type) bool { + return t.Kind() == reflect.Struct +} + +func isStructPtr(t reflect.Type) bool { + return t.Kind() == reflect.Ptr && t.Elem().Kind() == reflect.Struct +} + +func checkMandatoryUnit(field reflect.Value, tagField reflect.StructField, parent string) (errs error) { + mandatory := !strings.Contains(tagField.Tag.Get("json"), "omitempty") + switch field.Kind() { + case reflect.Ptr: + if mandatory && field.IsNil() { + errs = multierror.Append(errs, fmt.Errorf("'%s.%s' should not be empty", parent, tagField.Name)) + } + case reflect.String: + if mandatory && (field.Len() == 0) { + errs = multierror.Append(errs, fmt.Errorf("'%s.%s' should not be empty", parent, tagField.Name)) + } + case reflect.Slice: + if mandatory && (field.IsNil() || field.Len() == 0) { + errs = multierror.Append(errs, fmt.Errorf("'%s.%s' should not be empty", parent, tagField.Name)) + return + } + for index := 0; index < field.Len(); index++ { + mValue := field.Index(index) + if mValue.CanInterface() { + errs = multierror.Append(errs, checkMandatory(mValue.Interface())) + } + } + case reflect.Map: + if mandatory && (field.IsNil() || field.Len() == 0) { + errs = multierror.Append(errs, fmt.Errorf("'%s.%s' should not be empty", parent, tagField.Name)) + return + } + keys := field.MapKeys() + for index := 0; index < len(keys); index++ { + mValue := field.MapIndex(keys[index]) + if mValue.CanInterface() { + errs = multierror.Append(errs, checkMandatory(mValue.Interface())) + } + } + default: + } + + return +} + +func checkMandatory(obj interface{}) (errs error) { + objT := reflect.TypeOf(obj) + objV := reflect.ValueOf(obj) + if isStructPtr(objT) { + objT = objT.Elem() + objV = objV.Elem() + } else if !isStruct(objT) { + return + } + + for i := 0; i < objT.NumField(); i++ { + t := objT.Field(i).Type + if isStructPtr(t) && objV.Field(i).IsNil() { + if !strings.Contains(objT.Field(i).Tag.Get("json"), "omitempty") { + errs = multierror.Append(errs, fmt.Errorf("'%s.%s' should not be empty", objT.Name(), objT.Field(i).Name)) + } + } else if (isStruct(t) || isStructPtr(t)) && objV.Field(i).CanInterface() { + errs = multierror.Append(errs, checkMandatory(objV.Field(i).Interface())) + } else { + errs = multierror.Append(errs, checkMandatoryUnit(objV.Field(i), objT.Field(i), objT.Name())) + } + + } + return +} + +// CheckMandatoryFields checks mandatory field of container's config file +func (v *Validator) CheckMandatoryFields() error { + logrus.Debugf("check mandatory fields") + + return checkMandatory(v.spec) +} diff --git a/vendor/github.com/opencontainers/selinux/LICENSE b/vendor/github.com/opencontainers/selinux/LICENSE new file mode 100644 index 000000000..8dada3eda --- /dev/null +++ b/vendor/github.com/opencontainers/selinux/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/opencontainers/selinux/README.md b/vendor/github.com/opencontainers/selinux/README.md new file mode 100644 index 000000000..043a92937 --- /dev/null +++ b/vendor/github.com/opencontainers/selinux/README.md @@ -0,0 +1,7 @@ +# selinux + +[![GoDoc](https://godoc.org/github.com/opencontainers/selinux?status.svg)](https://godoc.org/github.com/opencontainers/selinux) [![Go Report Card](https://goreportcard.com/badge/github.com/opencontainers/selinux)](https://goreportcard.com/report/github.com/opencontainers/selinux) [![Build Status](https://travis-ci.org/opencontainers/selinux.svg?branch=master)](https://travis-ci.org/opencontainers/selinux) + +Common SELinux package used across the container ecosystem. + +Please see the [godoc](https://godoc.org/github.com/opencontainers/selinux) for more information. diff --git a/vendor/github.com/opencontainers/selinux/go-selinux/label/label.go b/vendor/github.com/opencontainers/selinux/go-selinux/label/label.go new file mode 100644 index 000000000..6cfc5fded --- /dev/null +++ b/vendor/github.com/opencontainers/selinux/go-selinux/label/label.go @@ -0,0 +1,84 @@ +// +build !selinux !linux + +package label + +// InitLabels returns the process label and file labels to be used within +// the container. A list of options can be passed into this function to alter +// the labels. +func InitLabels(options []string) (string, string, error) { + return "", "", nil +} + +func GetROMountLabel() string { + return "" +} + +func GenLabels(options string) (string, string, error) { + return "", "", nil +} + +func FormatMountLabel(src string, mountLabel string) string { + return src +} + +func SetProcessLabel(processLabel string) error { + return nil +} + +func GetFileLabel(path string) (string, error) { + return "", nil +} + +func SetFileLabel(path string, fileLabel string) error { + return nil +} + +func SetFileCreateLabel(fileLabel string) error { + return nil +} + +func Relabel(path string, fileLabel string, shared bool) error { + return nil +} + +func GetPidLabel(pid int) (string, error) { + return "", nil +} + +func Init() { +} + +func ReserveLabel(label string) error { + return nil +} + +func ReleaseLabel(label string) error { + return nil +} + +// DupSecOpt takes a process label and returns security options that +// can be used to set duplicate labels on future container processes +func DupSecOpt(src string) []string { + return nil +} + +// DisableSecOpt returns a security opt that can disable labeling +// support for future container processes +func DisableSecOpt() []string { + return nil +} + +// Validate checks that the label does not include unexpected options +func Validate(label string) error { + return nil +} + +// RelabelNeeded checks whether the user requested a relabel +func RelabelNeeded(label string) bool { + return false +} + +// IsShared checks that the label includes a "shared" mark +func IsShared(label string) bool { + return false +} diff --git a/vendor/github.com/opencontainers/selinux/go-selinux/label/label_selinux.go b/vendor/github.com/opencontainers/selinux/go-selinux/label/label_selinux.go new file mode 100644 index 000000000..c008a387b --- /dev/null +++ b/vendor/github.com/opencontainers/selinux/go-selinux/label/label_selinux.go @@ -0,0 +1,206 @@ +// +build selinux,linux + +package label + +import ( + "fmt" + "strings" + + "github.com/opencontainers/selinux/go-selinux" +) + +// Valid Label Options +var validOptions = map[string]bool{ + "disable": true, + "type": true, + "user": true, + "role": true, + "level": true, +} + +var ErrIncompatibleLabel = fmt.Errorf("Bad SELinux option z and Z can not be used together") + +// InitLabels returns the process label and file labels to be used within +// the container. A list of options can be passed into this function to alter +// the labels. The labels returned will include a random MCS String, that is +// guaranteed to be unique. +func InitLabels(options []string) (string, string, error) { + if !selinux.GetEnabled() { + return "", "", nil + } + processLabel, mountLabel := selinux.ContainerLabels() + if processLabel != "" { + pcon := selinux.NewContext(processLabel) + mcon := selinux.NewContext(mountLabel) + for _, opt := range options { + if opt == "disable" { + return "", "", nil + } + if i := strings.Index(opt, ":"); i == -1 { + return "", "", fmt.Errorf("Bad label option %q, valid options 'disable' or \n'user, role, level, type' followed by ':' and a value", opt) + } + con := strings.SplitN(opt, ":", 2) + if !validOptions[con[0]] { + return "", "", fmt.Errorf("Bad label option %q, valid options 'disable, user, role, level, type'", con[0]) + + } + pcon[con[0]] = con[1] + if con[0] == "level" || con[0] == "user" { + mcon[con[0]] = con[1] + } + } + _ = ReleaseLabel(processLabel) + processLabel = pcon.Get() + mountLabel = mcon.Get() + _ = ReserveLabel(processLabel) + } + return processLabel, mountLabel, nil +} + +func ROMountLabel() string { + return selinux.ROFileLabel() +} + +// DEPRECATED: The GenLabels function is only to be used during the transition to the official API. +func GenLabels(options string) (string, string, error) { + return InitLabels(strings.Fields(options)) +} + +// FormatMountLabel returns a string to be used by the mount command. +// The format of this string will be used to alter the labeling of the mountpoint. +// The string returned is suitable to be used as the options field of the mount command. +// If you need to have additional mount point options, you can pass them in as +// the first parameter. Second parameter is the label that you wish to apply +// to all content in the mount point. +func FormatMountLabel(src, mountLabel string) string { + if mountLabel != "" { + switch src { + case "": + src = fmt.Sprintf("context=%q", mountLabel) + default: + src = fmt.Sprintf("%s,context=%q", src, mountLabel) + } + } + return src +} + +// SetProcessLabel takes a process label and tells the kernel to assign the +// label to the next program executed by the current process. +func SetProcessLabel(processLabel string) error { + if processLabel == "" { + return nil + } + return selinux.SetExecLabel(processLabel) +} + +// ProcessLabel returns the process label that the kernel will assign +// to the next program executed by the current process. If "" is returned +// this indicates that the default labeling will happen for the process. +func ProcessLabel() (string, error) { + return selinux.ExecLabel() +} + +// GetFileLabel returns the label for specified path +func FileLabel(path string) (string, error) { + return selinux.FileLabel(path) +} + +// SetFileLabel modifies the "path" label to the specified file label +func SetFileLabel(path string, fileLabel string) error { + if selinux.GetEnabled() && fileLabel != "" { + return selinux.SetFileLabel(path, fileLabel) + } + return nil +} + +// SetFileCreateLabel tells the kernel the label for all files to be created +func SetFileCreateLabel(fileLabel string) error { + if selinux.GetEnabled() { + return selinux.SetFSCreateLabel(fileLabel) + } + return nil +} + +// Relabel changes the label of path to the filelabel string. +// It changes the MCS label to s0 if shared is true. +// This will allow all containers to share the content. +func Relabel(path string, fileLabel string, shared bool) error { + if !selinux.GetEnabled() { + return nil + } + + if fileLabel == "" { + return nil + } + + exclude_paths := map[string]bool{"/": true, "/usr": true, "/etc": true} + if exclude_paths[path] { + return fmt.Errorf("SELinux relabeling of %s is not allowed", path) + } + + if shared { + c := selinux.NewContext(fileLabel) + c["level"] = "s0" + fileLabel = c.Get() + } + if err := selinux.Chcon(path, fileLabel, true); err != nil { + return err + } + return nil +} + +// PidLabel will return the label of the process running with the specified pid +func PidLabel(pid int) (string, error) { + return selinux.PidLabel(pid) +} + +// Init initialises the labeling system +func Init() { + selinux.GetEnabled() +} + +// ReserveLabel will record the fact that the MCS label has already been used. +// This will prevent InitLabels from using the MCS label in a newly created +// container +func ReserveLabel(label string) error { + selinux.ReserveLabel(label) + return nil +} + +// ReleaseLabel will remove the reservation of the MCS label. +// This will allow InitLabels to use the MCS label in a newly created +// containers +func ReleaseLabel(label string) error { + selinux.ReleaseLabel(label) + return nil +} + +// DupSecOpt takes a process label and returns security options that +// can be used to set duplicate labels on future container processes +func DupSecOpt(src string) []string { + return selinux.DupSecOpt(src) +} + +// DisableSecOpt returns a security opt that can disable labeling +// support for future container processes +func DisableSecOpt() []string { + return selinux.DisableSecOpt() +} + +// Validate checks that the label does not include unexpected options +func Validate(label string) error { + if strings.Contains(label, "z") && strings.Contains(label, "Z") { + return ErrIncompatibleLabel + } + return nil +} + +// RelabelNeeded checks whether the user requested a relabel +func RelabelNeeded(label string) bool { + return strings.Contains(label, "z") || strings.Contains(label, "Z") +} + +// IsShared checks that the label includes a "shared" mark +func IsShared(label string) bool { + return strings.Contains(label, "z") +} diff --git a/vendor/github.com/opencontainers/selinux/go-selinux/selinux.go b/vendor/github.com/opencontainers/selinux/go-selinux/selinux.go new file mode 100644 index 000000000..de9316c2e --- /dev/null +++ b/vendor/github.com/opencontainers/selinux/go-selinux/selinux.go @@ -0,0 +1,593 @@ +// +build linux + +package selinux + +import ( + "bufio" + "crypto/rand" + "encoding/binary" + "fmt" + "io" + "os" + "path/filepath" + "regexp" + "strconv" + "strings" + "sync" + "syscall" +) + +const ( + // Enforcing constant indicate SELinux is in enforcing mode + Enforcing = 1 + // Permissive constant to indicate SELinux is in permissive mode + Permissive = 0 + // Disabled constant to indicate SELinux is disabled + Disabled = -1 + selinuxDir = "/etc/selinux/" + selinuxConfig = selinuxDir + "config" + selinuxTypeTag = "SELINUXTYPE" + selinuxTag = "SELINUX" + selinuxPath = "/sys/fs/selinux" + xattrNameSelinux = "security.selinux" + stRdOnly = 0x01 +) + +type selinuxState struct { + enabledSet bool + enabled bool + selinuxfsSet bool + selinuxfs string + mcsList map[string]bool + sync.Mutex +} + +var ( + assignRegex = regexp.MustCompile(`^([^=]+)=(.*)$`) + state = selinuxState{ + mcsList: make(map[string]bool), + } +) + +// Context is a representation of the SELinux label broken into 4 parts +type Context map[string]string + +func (s *selinuxState) setEnable(enabled bool) bool { + s.Lock() + defer s.Unlock() + s.enabledSet = true + s.enabled = enabled + return s.enabled +} + +func (s *selinuxState) getEnabled() bool { + s.Lock() + enabled := s.enabled + enabledSet := s.enabledSet + s.Unlock() + if enabledSet { + return enabled + } + + enabled = false + if fs := getSelinuxMountPoint(); fs != "" { + if con, _ := CurrentLabel(); con != "kernel" { + enabled = true + } + } + return s.setEnable(enabled) +} + +// SetDisabled disables selinux support for the package +func SetDisabled() { + state.setEnable(false) +} + +func (s *selinuxState) setSELinuxfs(selinuxfs string) string { + s.Lock() + defer s.Unlock() + s.selinuxfsSet = true + s.selinuxfs = selinuxfs + return s.selinuxfs +} + +func (s *selinuxState) getSELinuxfs() string { + s.Lock() + selinuxfs := s.selinuxfs + selinuxfsSet := s.selinuxfsSet + s.Unlock() + if selinuxfsSet { + return selinuxfs + } + + selinuxfs = "" + f, err := os.Open("/proc/self/mountinfo") + if err != nil { + return selinuxfs + } + defer f.Close() + + scanner := bufio.NewScanner(f) + for scanner.Scan() { + txt := scanner.Text() + // Safe as mountinfo encodes mountpoints with spaces as \040. + sepIdx := strings.Index(txt, " - ") + if sepIdx == -1 { + continue + } + if !strings.Contains(txt[sepIdx:], "selinuxfs") { + continue + } + fields := strings.Split(txt, " ") + if len(fields) < 5 { + continue + } + selinuxfs = fields[4] + break + } + + if selinuxfs != "" { + var buf syscall.Statfs_t + syscall.Statfs(selinuxfs, &buf) + if (buf.Flags & stRdOnly) == 1 { + selinuxfs = "" + } + } + return s.setSELinuxfs(selinuxfs) +} + +// getSelinuxMountPoint returns the path to the mountpoint of an selinuxfs +// filesystem or an empty string if no mountpoint is found. Selinuxfs is +// a proc-like pseudo-filesystem that exposes the selinux policy API to +// processes. The existence of an selinuxfs mount is used to determine +// whether selinux is currently enabled or not. +func getSelinuxMountPoint() string { + return state.getSELinuxfs() +} + +// GetEnabled returns whether selinux is currently enabled. +func GetEnabled() bool { + return state.getEnabled() +} + +func readConfig(target string) (value string) { + var ( + val, key string + bufin *bufio.Reader + ) + + in, err := os.Open(selinuxConfig) + if err != nil { + return "" + } + defer in.Close() + + bufin = bufio.NewReader(in) + + for done := false; !done; { + var line string + if line, err = bufin.ReadString('\n'); err != nil { + if err != io.EOF { + return "" + } + done = true + } + line = strings.TrimSpace(line) + if len(line) == 0 { + // Skip blank lines + continue + } + if line[0] == ';' || line[0] == '#' { + // Skip comments + continue + } + if groups := assignRegex.FindStringSubmatch(line); groups != nil { + key, val = strings.TrimSpace(groups[1]), strings.TrimSpace(groups[2]) + if key == target { + return strings.Trim(val, "\"") + } + } + } + return "" +} + +func getSELinuxPolicyRoot() string { + return selinuxDir + readConfig(selinuxTypeTag) +} + +func readCon(name string) (string, error) { + var val string + + in, err := os.Open(name) + if err != nil { + return "", err + } + defer in.Close() + + _, err = fmt.Fscanf(in, "%s", &val) + return val, err +} + +// SetFileLabel sets the SELinux label for this path or returns an error. +func SetFileLabel(path string, label string) error { + return lsetxattr(path, xattrNameSelinux, []byte(label), 0) +} + +// FileLabel returns the SELinux label for this path or returns an error. +func FileLabel(path string) (string, error) { + label, err := lgetxattr(path, xattrNameSelinux) + if err != nil { + return "", err + } + // Trim the NUL byte at the end of the byte buffer, if present. + if len(label) > 0 && label[len(label)-1] == '\x00' { + label = label[:len(label)-1] + } + return string(label), nil +} + +/* +SetFSCreateLabel tells kernel the label to create all file system objects +created by this task. Setting label="" to return to default. +*/ +func SetFSCreateLabel(label string) error { + return writeCon(fmt.Sprintf("/proc/self/task/%d/attr/fscreate", syscall.Gettid()), label) +} + +/* +FSCreateLabel returns the default label the kernel which the kernel is using +for file system objects created by this task. "" indicates default. +*/ +func FSCreateLabel() (string, error) { + return readCon(fmt.Sprintf("/proc/self/task/%d/attr/fscreate", syscall.Gettid())) +} + +// CurrentLabel returns the SELinux label of the current process thread, or an error. +func CurrentLabel() (string, error) { + return readCon(fmt.Sprintf("/proc/self/task/%d/attr/current", syscall.Gettid())) +} + +// PidLabel returns the SELinux label of the given pid, or an error. +func PidLabel(pid int) (string, error) { + return readCon(fmt.Sprintf("/proc/%d/attr/current", pid)) +} + +/* +ExecLabel returns the SELinux label that the kernel will use for any programs +that are executed by the current process thread, or an error. +*/ +func ExecLabel() (string, error) { + return readCon(fmt.Sprintf("/proc/self/task/%d/attr/exec", syscall.Gettid())) +} + +func writeCon(name string, val string) error { + out, err := os.OpenFile(name, os.O_WRONLY, 0) + if err != nil { + return err + } + defer out.Close() + + if val != "" { + _, err = out.Write([]byte(val)) + } else { + _, err = out.Write(nil) + } + return err +} + +/* +SetExecLabel sets the SELinux label that the kernel will use for any programs +that are executed by the current process thread, or an error. +*/ +func SetExecLabel(label string) error { + return writeCon(fmt.Sprintf("/proc/self/task/%d/attr/exec", syscall.Gettid()), label) +} + +// Get returns the Context as a string +func (c Context) Get() string { + return fmt.Sprintf("%s:%s:%s:%s", c["user"], c["role"], c["type"], c["level"]) +} + +// NewContext creates a new Context struct from the specified label +func NewContext(label string) Context { + c := make(Context) + + if len(label) != 0 { + con := strings.SplitN(label, ":", 4) + c["user"] = con[0] + c["role"] = con[1] + c["type"] = con[2] + c["level"] = con[3] + } + return c +} + +// ReserveLabel reserves the MLS/MCS level component of the specified label +func ReserveLabel(label string) { + if len(label) != 0 { + con := strings.SplitN(label, ":", 4) + mcsAdd(con[3]) + } +} + +func selinuxEnforcePath() string { + return fmt.Sprintf("%s/enforce", selinuxPath) +} + +// EnforceMode returns the current SELinux mode Enforcing, Permissive, Disabled +func EnforceMode() int { + var enforce int + + enforceS, err := readCon(selinuxEnforcePath()) + if err != nil { + return -1 + } + + enforce, err = strconv.Atoi(string(enforceS)) + if err != nil { + return -1 + } + return enforce +} + +/* +SetEnforceMode sets the current SELinux mode Enforcing, Permissive. +Disabled is not valid, since this needs to be set at boot time. +*/ +func SetEnforceMode(mode int) error { + return writeCon(selinuxEnforcePath(), fmt.Sprintf("%d", mode)) +} + +/* +DefaultEnforceMode returns the systems default SELinux mode Enforcing, +Permissive or Disabled. Note this is is just the default at boot time. +EnforceMode tells you the systems current mode. +*/ +func DefaultEnforceMode() int { + switch readConfig(selinuxTag) { + case "enforcing": + return Enforcing + case "permissive": + return Permissive + } + return Disabled +} + +func mcsAdd(mcs string) error { + state.Lock() + defer state.Unlock() + if state.mcsList[mcs] { + return fmt.Errorf("MCS Label already exists") + } + state.mcsList[mcs] = true + return nil +} + +func mcsDelete(mcs string) { + state.Lock() + defer state.Unlock() + state.mcsList[mcs] = false +} + +func intToMcs(id int, catRange uint32) string { + var ( + SETSIZE = int(catRange) + TIER = SETSIZE + ORD = id + ) + + if id < 1 || id > 523776 { + return "" + } + + for ORD > TIER { + ORD = ORD - TIER + TIER-- + } + TIER = SETSIZE - TIER + ORD = ORD + TIER + return fmt.Sprintf("s0:c%d,c%d", TIER, ORD) +} + +func uniqMcs(catRange uint32) string { + var ( + n uint32 + c1, c2 uint32 + mcs string + ) + + for { + binary.Read(rand.Reader, binary.LittleEndian, &n) + c1 = n % catRange + binary.Read(rand.Reader, binary.LittleEndian, &n) + c2 = n % catRange + if c1 == c2 { + continue + } else { + if c1 > c2 { + c1, c2 = c2, c1 + } + } + mcs = fmt.Sprintf("s0:c%d,c%d", c1, c2) + if err := mcsAdd(mcs); err != nil { + continue + } + break + } + return mcs +} + +/* +ReleaseLabel will unreserve the MLS/MCS Level field of the specified label. +Allowing it to be used by another process. +*/ +func ReleaseLabel(label string) { + if len(label) != 0 { + con := strings.SplitN(label, ":", 4) + mcsDelete(con[3]) + } +} + +var roFileLabel string + +// ROFileLabel returns the specified SELinux readonly file label +func ROFileLabel() (fileLabel string) { + return roFileLabel +} + +/* +ContainerLabels returns an allocated processLabel and fileLabel to be used for +container labeling by the calling process. +*/ +func ContainerLabels() (processLabel string, fileLabel string) { + var ( + val, key string + bufin *bufio.Reader + ) + + if !GetEnabled() { + return "", "" + } + lxcPath := fmt.Sprintf("%s/contexts/lxc_contexts", getSELinuxPolicyRoot()) + in, err := os.Open(lxcPath) + if err != nil { + return "", "" + } + defer in.Close() + + bufin = bufio.NewReader(in) + + for done := false; !done; { + var line string + if line, err = bufin.ReadString('\n'); err != nil { + if err == io.EOF { + done = true + } else { + goto exit + } + } + line = strings.TrimSpace(line) + if len(line) == 0 { + // Skip blank lines + continue + } + if line[0] == ';' || line[0] == '#' { + // Skip comments + continue + } + if groups := assignRegex.FindStringSubmatch(line); groups != nil { + key, val = strings.TrimSpace(groups[1]), strings.TrimSpace(groups[2]) + if key == "process" { + processLabel = strings.Trim(val, "\"") + } + if key == "file" { + fileLabel = strings.Trim(val, "\"") + } + if key == "ro_file" { + roFileLabel = strings.Trim(val, "\"") + } + } + } + + if processLabel == "" || fileLabel == "" { + return "", "" + } + + if roFileLabel == "" { + roFileLabel = fileLabel + } +exit: + mcs := uniqMcs(1024) + scon := NewContext(processLabel) + scon["level"] = mcs + processLabel = scon.Get() + scon = NewContext(fileLabel) + scon["level"] = mcs + fileLabel = scon.Get() + return processLabel, fileLabel +} + +// SecurityCheckContext validates that the SELinux label is understood by the kernel +func SecurityCheckContext(val string) error { + return writeCon(fmt.Sprintf("%s.context", selinuxPath), val) +} + +/* +CopyLevel returns a label with the MLS/MCS level from src label replaces on +the dest label. +*/ +func CopyLevel(src, dest string) (string, error) { + if src == "" { + return "", nil + } + if err := SecurityCheckContext(src); err != nil { + return "", err + } + if err := SecurityCheckContext(dest); err != nil { + return "", err + } + scon := NewContext(src) + tcon := NewContext(dest) + mcsDelete(tcon["level"]) + mcsAdd(scon["level"]) + tcon["level"] = scon["level"] + return tcon.Get(), nil +} + +// Prevent users from relabing system files +func badPrefix(fpath string) error { + var badprefixes = []string{"/usr"} + + for _, prefix := range badprefixes { + if fpath == prefix || strings.HasPrefix(fpath, fmt.Sprintf("%s/", prefix)) { + return fmt.Errorf("relabeling content in %s is not allowed", prefix) + } + } + return nil +} + +// Chcon changes the fpath file object to the SELinux label label. +// If the fpath is a directory and recurse is true Chcon will walk the +// directory tree setting the label +func Chcon(fpath string, label string, recurse bool) error { + if label == "" { + return nil + } + if err := badPrefix(fpath); err != nil { + return err + } + callback := func(p string, info os.FileInfo, err error) error { + return SetFileLabel(p, label) + } + + if recurse { + return filepath.Walk(fpath, callback) + } + + return SetFileLabel(fpath, label) +} + +// DupSecOpt takes an SELinux process label and returns security options that +// can will set the SELinux Type and Level for future container processes +func DupSecOpt(src string) []string { + if src == "" { + return nil + } + con := NewContext(src) + if con["user"] == "" || + con["role"] == "" || + con["type"] == "" || + con["level"] == "" { + return nil + } + return []string{"user:" + con["user"], + "role:" + con["role"], + "type:" + con["type"], + "level:" + con["level"]} +} + +// DisableSecOpt returns a security opt that can be used to disabling SELinux +// labeling support for future container processes +func DisableSecOpt() []string { + return []string{"disable"} +} diff --git a/vendor/github.com/opencontainers/selinux/go-selinux/xattrs.go b/vendor/github.com/opencontainers/selinux/go-selinux/xattrs.go new file mode 100644 index 000000000..7f2ef8504 --- /dev/null +++ b/vendor/github.com/opencontainers/selinux/go-selinux/xattrs.go @@ -0,0 +1,78 @@ +// +build linux + +package selinux + +import ( + "syscall" + "unsafe" +) + +var _zero uintptr + +// Returns a []byte slice if the xattr is set and nil otherwise +// Requires path and its attribute as arguments +func lgetxattr(path string, attr string) ([]byte, error) { + var sz int + pathBytes, err := syscall.BytePtrFromString(path) + if err != nil { + return nil, err + } + attrBytes, err := syscall.BytePtrFromString(attr) + if err != nil { + return nil, err + } + + // Start with a 128 length byte array + sz = 128 + dest := make([]byte, sz) + destBytes := unsafe.Pointer(&dest[0]) + _sz, _, errno := syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(destBytes), uintptr(len(dest)), 0, 0) + + switch { + case errno == syscall.ENODATA: + return nil, errno + case errno == syscall.ENOTSUP: + return nil, errno + case errno == syscall.ERANGE: + // 128 byte array might just not be good enough, + // A dummy buffer is used ``uintptr(0)`` to get real size + // of the xattrs on disk + _sz, _, errno = syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(unsafe.Pointer(nil)), uintptr(0), 0, 0) + sz = int(_sz) + if sz < 0 { + return nil, errno + } + dest = make([]byte, sz) + destBytes := unsafe.Pointer(&dest[0]) + _sz, _, errno = syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(destBytes), uintptr(len(dest)), 0, 0) + if errno != 0 { + return nil, errno + } + case errno != 0: + return nil, errno + } + sz = int(_sz) + return dest[:sz], nil +} + +func lsetxattr(path string, attr string, data []byte, flags int) error { + pathBytes, err := syscall.BytePtrFromString(path) + if err != nil { + return err + } + attrBytes, err := syscall.BytePtrFromString(attr) + if err != nil { + return err + } + var dataBytes unsafe.Pointer + if len(data) > 0 { + dataBytes = unsafe.Pointer(&data[0]) + } else { + dataBytes = unsafe.Pointer(&_zero) + } + _, _, errno := syscall.Syscall6(syscall.SYS_LSETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(dataBytes), uintptr(len(data)), uintptr(flags), 0) + if errno != 0 { + return errno + } + return nil +} |