summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.papr.yml4
-rw-r--r--Dockerfile13
-rw-r--r--Makefile11
-rw-r--r--README.md1
-rw-r--r--conmon/Makefile13
-rw-r--r--conmon/cmsg.c149
-rw-r--r--conmon/cmsg.h38
-rw-r--r--conmon/conmon.c1465
-rw-r--r--install.md6
-rw-r--r--test/helpers.bash8
10 files changed, 30 insertions, 1678 deletions
diff --git a/.papr.yml b/.papr.yml
index b82f7df61..993407e9d 100644
--- a/.papr.yml
+++ b/.papr.yml
@@ -15,6 +15,7 @@ tests:
- CRIO_ROOT=/var/tmp/checkout PODMAN_BINARY=/usr/bin/podman CONMON_BINARY=/usr/libexec/crio/conmon PAPR=1 sh .papr.sh
packages:
+ - cri-o
- containernetworking-cni
extra-repos:
@@ -32,5 +33,8 @@ extra-repos:
- name: epel
metalink: https://mirrors.fedoraproject.org/metalink?repo=epel-7&arch=$basearch
gpgcheck: 0
+ - name: cri-o
+ baseurl: https://cbs.centos.org/repos/virt7-container-common-candidate/$basearch/os
+ gpgcheck: 0
context: centos/7/atomic/smoketested
diff --git a/Dockerfile b/Dockerfile
index b562ddd1f..c046c6ad3 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -67,6 +67,19 @@ RUN set -x \
&& cp runc /usr/bin/runc \
&& rm -rf "$GOPATH"
+# Install conmon
+ENV CRIO_COMMIT 814c6ab0913d827543696b366048056a31d9529c
+RUN set -x \
+ && export GOPATH="$(mktemp -d)" \
+ && git clone https://github.com/kubernetes-incubator/cri-o.git "$GOPATH/src/github.com/kubernetes-incubator/cri-o.git" \
+ && cd "$GOPATH/src/github.com/kubernetes-incubator/cri-o.git" \
+ && git fetch origin --tags \
+ && git checkout -q "$CRIO_COMMIT" \
+ && mkdir bin \
+ && make conmon \
+ && install -D -m 755 bin/conmon /usr/libexec/crio/conmon \
+ && rm -rf "$GOPATH"
+
# Install CNI plugins
ENV CNI_COMMIT 7480240de9749f9a0a5c8614b17f1f03e0c06ab9
RUN set -x \
diff --git a/Makefile b/Makefile
index cee8257d6..810492a5a 100644
--- a/Makefile
+++ b/Makefile
@@ -52,7 +52,7 @@ help:
@echo "Usage: make <target>"
@echo
@echo " * 'install' - Install binaries to system locations"
- @echo " * 'binaries' - Build conmon and podman"
+ @echo " * 'binaries' - Build podmon"
@echo " * 'integration' - Execute integration tests"
@echo " * 'clean' - Clean artifacts"
@echo " * 'lint' - Execute the source code linter"
@@ -75,9 +75,6 @@ gofmt:
fix_gofmt:
@./hack/verify-gofmt.sh -f
-conmon:
- $(MAKE) -C $@
-
test/bin2img/bin2img: .gopathok $(wildcard test/bin2img/*.go)
$(GO) build $(LDFLAGS) -tags "$(BUILDTAGS) containers_image_ostree_stub" -o $@ $(PROJECT)/test/bin2img
@@ -100,7 +97,6 @@ endif
find . -name \*~ -delete
find . -name \#\* -delete
rm -f bin/podman
- make -C conmon clean
rm -f test/bin2img/bin2img
rm -f test/copyimg/copyimg
rm -f test/checkseccomp/checkseccomp
@@ -123,7 +119,7 @@ localintegration: test-binaries
vagrant-check:
BOX=$(BOX) sh ./vagrant.sh
-binaries: conmon podman
+binaries: podman
test-binaries: test/bin2img/bin2img test/copyimg/copyimg test/checkseccomp/checkseccomp
@@ -142,7 +138,6 @@ install: .gopathok install.bin install.man install.cni
install.bin:
install ${SELINUXOPT} -D -m 755 bin/podman $(BINDIR)/podman
- install ${SELINUXOPT} -D -m 755 bin/conmon $(LIBEXECDIR)/crio/conmon
install.man: docs
install ${SELINUXOPT} -d -m 755 $(MANDIR)/man1
@@ -165,7 +160,6 @@ install.docker: docker-docs
install ${SELINUXOPT} -m 644 docs/docker*.1 -t $(MANDIR)/man1
uninstall:
- rm -f $(LIBEXECDIR)/crio/conmon
for i in $(filter %.1,$(MANPAGES)); do \
rm -f $(MANDIR)/man1/$$(basename $${i}); \
done
@@ -208,7 +202,6 @@ install.tools: .install.gitvalidation .install.gometalinter .install.md2man
.PHONY: \
binaries \
clean \
- conmon \
default \
docs \
gofmt \
diff --git a/README.md b/README.md
index 54d1aeec4..abdb854b5 100644
--- a/README.md
+++ b/README.md
@@ -29,6 +29,7 @@ The plan is to use OCI projects and best of breed libraries for different aspect
- Images: Image management using [containers/image](https://github.com/containers/image)
- Storage: Storage and management of image layers using [containers/storage](https://github.com/containers/storage)
- Networking: Networking support through use of [CNI](https://github.com/containernetworking/cni)
+- Conmon: [conmon](https://github.com/kubernetes-incubator/cri-o) Conmon is a tool for monitoring OCI runtimes. Part of the CRI-O package
## Podman Information for Developers
diff --git a/conmon/Makefile b/conmon/Makefile
deleted file mode 100644
index 67c067be3..000000000
--- a/conmon/Makefile
+++ /dev/null
@@ -1,13 +0,0 @@
-src = $(wildcard *.c)
-obj = $(src:.c=.o)
-
-override LIBS += $(shell pkg-config --libs glib-2.0)
-override CFLAGS += -std=c99 -Os -Wall -Wextra $(shell pkg-config --cflags glib-2.0)
-
-conmon: $(obj)
- mkdir -p ../bin/
- $(CC) -o ../bin/$@ $^ $(CFLAGS) $(LIBS)
-
-.PHONY: clean
-clean:
- rm -f $(obj) ../bin/conmon
diff --git a/conmon/cmsg.c b/conmon/cmsg.c
deleted file mode 100644
index c44db2ef1..000000000
--- a/conmon/cmsg.c
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- * Copyright 2016 SUSE LLC
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* NOTE: This code comes directly from runc/libcontainer/utils/cmsg.c. */
-
-#include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/socket.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-#include "cmsg.h"
-
-#define error(fmt, ...) \
- ({ \
- fprintf(stderr, "nsenter: " fmt ": %m\n", ##__VA_ARGS__); \
- errno = ECOMM; \
- goto err; /* return value */ \
- })
-
-/*
- * Sends a file descriptor along the sockfd provided. Returns the return
- * value of sendmsg(2). Any synchronisation and preparation of state
- * should be done external to this (we expect the other side to be in
- * recvfd() in the code).
- */
-ssize_t sendfd(int sockfd, struct file_t file)
-{
- struct msghdr msg = {0};
- struct iovec iov[1] = {0};
- struct cmsghdr *cmsg;
- int *fdptr;
-
- union {
- char buf[CMSG_SPACE(sizeof(file.fd))];
- struct cmsghdr align;
- } u;
-
- /*
- * We need to send some other data along with the ancillary data,
- * otherwise the other side won't recieve any data. This is very
- * well-hidden in the documentation (and only applies to
- * SOCK_STREAM). See the bottom part of unix(7).
- */
- iov[0].iov_base = file.name;
- iov[0].iov_len = strlen(file.name) + 1;
-
- msg.msg_name = NULL;
- msg.msg_namelen = 0;
- msg.msg_iov = iov;
- msg.msg_iovlen = 1;
- msg.msg_control = u.buf;
- msg.msg_controllen = sizeof(u.buf);
-
- cmsg = CMSG_FIRSTHDR(&msg);
- cmsg->cmsg_level = SOL_SOCKET;
- cmsg->cmsg_type = SCM_RIGHTS;
- cmsg->cmsg_len = CMSG_LEN(sizeof(int));
-
- fdptr = (int *) CMSG_DATA(cmsg);
- memcpy(fdptr, &file.fd, sizeof(int));
-
- return sendmsg(sockfd, &msg, 0);
-}
-
-/*
- * Receives a file descriptor from the sockfd provided. Returns the file
- * descriptor as sent from sendfd(). It will return the file descriptor
- * or die (literally) trying. Any synchronisation and preparation of
- * state should be done external to this (we expect the other side to be
- * in sendfd() in the code).
- */
-struct file_t recvfd(int sockfd)
-{
- struct msghdr msg = {0};
- struct iovec iov[1] = {0};
- struct cmsghdr *cmsg;
- struct file_t file = {0};
- int *fdptr;
- int olderrno;
-
- union {
- char buf[CMSG_SPACE(sizeof(file.fd))];
- struct cmsghdr align;
- } u;
-
- /* Allocate a buffer. */
- /* TODO: Make this dynamic with MSG_PEEK. */
- file.name = malloc(TAG_BUFFER);
- if (!file.name)
- error("recvfd: failed to allocate file.tag buffer\n");
-
- /*
- * We need to "recieve" the non-ancillary data even though we don't
- * plan to use it at all. Otherwise, things won't work as expected.
- * See unix(7) and other well-hidden documentation.
- */
- iov[0].iov_base = file.name;
- iov[0].iov_len = TAG_BUFFER;
-
- msg.msg_name = NULL;
- msg.msg_namelen = 0;
- msg.msg_iov = iov;
- msg.msg_iovlen = 1;
- msg.msg_control = u.buf;
- msg.msg_controllen = sizeof(u.buf);
-
- ssize_t ret = recvmsg(sockfd, &msg, 0);
- if (ret < 0)
- goto err;
-
- cmsg = CMSG_FIRSTHDR(&msg);
- if (!cmsg)
- error("recvfd: got NULL from CMSG_FIRSTHDR");
- if (cmsg->cmsg_level != SOL_SOCKET)
- error("recvfd: expected SOL_SOCKET in cmsg: %d", cmsg->cmsg_level);
- if (cmsg->cmsg_type != SCM_RIGHTS)
- error("recvfd: expected SCM_RIGHTS in cmsg: %d", cmsg->cmsg_type);
- if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
- error("recvfd: expected correct CMSG_LEN in cmsg: %lu", cmsg->cmsg_len);
-
- fdptr = (int *) CMSG_DATA(cmsg);
- if (!fdptr || *fdptr < 0)
- error("recvfd: recieved invalid pointer");
-
- file.fd = *fdptr;
- return file;
-
-err:
- olderrno = errno;
- free(file.name);
- errno = olderrno;
- return (struct file_t){0};
-}
diff --git a/conmon/cmsg.h b/conmon/cmsg.h
deleted file mode 100644
index 7c7aefe6e..000000000
--- a/conmon/cmsg.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright 2016 SUSE LLC
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* NOTE: This code comes directly from runc/libcontainer/utils/cmsg.h. */
-
-#pragma once
-
-#if !defined(CMSG_H)
-#define CMSG_H
-
-#include <sys/types.h>
-
-/* TODO: Implement this properly with MSG_PEEK. */
-#define TAG_BUFFER 4096
-
-/* This mirrors Go's (*os.File). */
-struct file_t {
- char *name;
- int fd;
-};
-
-struct file_t recvfd(int sockfd);
-ssize_t sendfd(int sockfd, struct file_t file);
-
-#endif /* !defined(CMSG_H) */
diff --git a/conmon/conmon.c b/conmon/conmon.c
deleted file mode 100644
index b00cb0cd5..000000000
--- a/conmon/conmon.c
+++ /dev/null
@@ -1,1465 +0,0 @@
-#define _GNU_SOURCE
-#include <ctype.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <limits.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdint.h>
-#include <sys/prctl.h>
-#include <sys/socket.h>
-#include <sys/types.h>
-#include <sys/un.h>
-#include <sys/stat.h>
-#include <sys/wait.h>
-#include <sys/eventfd.h>
-#include <sys/stat.h>
-#include <sys/uio.h>
-#include <sys/ioctl.h>
-#include <termios.h>
-#include <syslog.h>
-#include <unistd.h>
-#include <inttypes.h>
-
-#include <glib.h>
-#include <glib-unix.h>
-
-#include "cmsg.h"
-
-#define pexit(fmt, ...) \
- do { \
- fprintf(stderr, "[conmon:e]: " fmt " %m\n", ##__VA_ARGS__); \
- syslog(LOG_ERR, "conmon <error>: " fmt ": %m\n", ##__VA_ARGS__); \
- exit(EXIT_FAILURE); \
- } while (0)
-
-#define nexit(fmt, ...) \
- do { \
- fprintf(stderr, "[conmon:e]: " fmt "\n", ##__VA_ARGS__); \
- syslog(LOG_ERR, "conmon <error>: " fmt " \n", ##__VA_ARGS__); \
- exit(EXIT_FAILURE); \
- } while (0)
-
-#define nwarn(fmt, ...) \
- do { \
- fprintf(stderr, "[conmon:w]: " fmt "\n", ##__VA_ARGS__); \
- syslog(LOG_INFO, "conmon <nwarn>: " fmt " \n", ##__VA_ARGS__); \
- } while (0)
-
-#define ninfo(fmt, ...) \
- do { \
- fprintf(stderr, "[conmon:i]: " fmt "\n", ##__VA_ARGS__); \
- syslog(LOG_INFO, "conmon <ninfo>: " fmt " \n", ##__VA_ARGS__); \
- } while (0)
-
-#define _cleanup_(x) __attribute__((cleanup(x)))
-
-static inline void freep(void *p)
-{
- free(*(void **)p);
-}
-
-static inline void closep(int *fd)
-{
- if (*fd >= 0)
- close(*fd);
- *fd = -1;
-}
-
-static inline void fclosep(FILE **fp) {
- if (*fp)
- fclose(*fp);
- *fp = NULL;
-}
-
-static inline void gstring_free_cleanup(GString **string)
-{
- if (*string)
- g_string_free(*string, TRUE);
-}
-
-static inline void strv_cleanup(char ***strv)
-{
- if (strv)
- g_strfreev (*strv);
-}
-
-#define _cleanup_free_ _cleanup_(freep)
-#define _cleanup_close_ _cleanup_(closep)
-#define _cleanup_fclose_ _cleanup_(fclosep)
-#define _cleanup_gstring_ _cleanup_(gstring_free_cleanup)
-#define _cleanup_strv_ _cleanup_(strv_cleanup)
-
-#define BUF_SIZE 8192
-#define CMD_SIZE 1024
-#define MAX_EVENTS 10
-
-#define DEFAULT_SOCKET_PATH "/var/lib/crio"
-
-static bool opt_terminal = false;
-static bool opt_stdin = false;
-static char *opt_cid = NULL;
-static char *opt_cuuid = NULL;
-static char *opt_runtime_path = NULL;
-static char *opt_bundle_path = NULL;
-static char *opt_pid_file = NULL;
-static bool opt_systemd_cgroup = false;
-static bool opt_no_pivot = false;
-static char *opt_exec_process_spec = NULL;
-static bool opt_exec = false;
-static char *opt_log_path = NULL;
-static char *opt_exit_dir = NULL;
-static int opt_timeout = 0;
-static int64_t opt_log_size_max = -1;
-static char *opt_socket_path = DEFAULT_SOCKET_PATH;
-static GOptionEntry opt_entries[] =
-{
- { "terminal", 't', 0, G_OPTION_ARG_NONE, &opt_terminal, "Terminal", NULL },
- { "stdin", 'i', 0, G_OPTION_ARG_NONE, &opt_stdin, "Stdin", NULL },
- { "cid", 'c', 0, G_OPTION_ARG_STRING, &opt_cid, "Container ID", NULL },
- { "cuuid", 'u', 0, G_OPTION_ARG_STRING, &opt_cuuid, "Container UUID", NULL },
- { "runtime", 'r', 0, G_OPTION_ARG_STRING, &opt_runtime_path, "Runtime path", NULL },
- { "no-pivot", 0, 0, G_OPTION_ARG_NONE, &opt_no_pivot, "do not use pivot_root", NULL },
- { "bundle", 'b', 0, G_OPTION_ARG_STRING, &opt_bundle_path, "Bundle path", NULL },
- { "pidfile", 'p', 0, G_OPTION_ARG_STRING, &opt_pid_file, "PID file", NULL },
- { "systemd-cgroup", 's', 0, G_OPTION_ARG_NONE, &opt_systemd_cgroup, "Enable systemd cgroup manager", NULL },
- { "exec", 'e', 0, G_OPTION_ARG_NONE, &opt_exec, "Exec a command in a running container", NULL },
- { "exec-process-spec", 0, 0, G_OPTION_ARG_STRING, &opt_exec_process_spec, "Path to the process spec for exec", NULL },
- { "exit-dir", 0, 0, G_OPTION_ARG_STRING, &opt_exit_dir, "Path to the directory where exit files are written", NULL },
- { "log-path", 'l', 0, G_OPTION_ARG_STRING, &opt_log_path, "Log file path", NULL },
- { "timeout", 'T', 0, G_OPTION_ARG_INT, &opt_timeout, "Timeout in seconds", NULL },
- { "log-size-max", 0, 0, G_OPTION_ARG_INT64, &opt_log_size_max, "Maximum size of log file", NULL },
- { "socket-dir-path", 0, 0, G_OPTION_ARG_STRING, &opt_socket_path, "Location of container attach sockets", NULL },
- { NULL }
-};
-
-/* strlen("1997-03-25T13:20:42.999999999+01:00 stdout ") + 1 */
-#define TSBUFLEN 44
-
-#define CGROUP_ROOT "/sys/fs/cgroup"
-
-static int log_fd = -1;
-
-static ssize_t write_all(int fd, const void *buf, size_t count)
-{
- size_t remaining = count;
- const char *p = buf;
- ssize_t res;
-
- while (remaining > 0) {
- do {
- res = write(fd, p, remaining);
- } while (res == -1 && errno == EINTR);
-
- if (res <= 0)
- return -1;
-
- remaining -= res;
- p += res;
- }
-
- return count;
-}
-
-#define WRITEV_BUFFER_N_IOV 128
-
-typedef struct {
- int iovcnt;
- struct iovec iov[WRITEV_BUFFER_N_IOV];
-} writev_buffer_t;
-
-static ssize_t writev_buffer_flush (int fd, writev_buffer_t *buf)
-{
- size_t count = 0;
- ssize_t res;
- struct iovec *iov;
- int iovcnt;
-
- iovcnt = buf->iovcnt;
- iov = buf->iov;
-
- while (iovcnt > 0) {
- do {
- res = writev(fd, iov, iovcnt);
- } while (res == -1 && errno == EINTR);
-
- if (res <= 0)
- return -1;
-
- count += res;
-
- while (res > 0) {
- size_t from_this = MIN((size_t)res, iov->iov_len);
- iov->iov_len -= from_this;
- res -= from_this;
-
- if (iov->iov_len == 0) {
- iov++;
- iovcnt--;
- }
- }
- }
-
- buf->iovcnt = 0;
-
- return count;
-}
-
-ssize_t writev_buffer_append_segment(int fd, writev_buffer_t *buf, const void *data, ssize_t len)
-{
- if (data == NULL)
- return 1;
-
- if (len < 0)
- len = strlen ((char *)data);
-
- if (buf->iovcnt == WRITEV_BUFFER_N_IOV &&
- writev_buffer_flush (fd, buf) < 0)
- return -1;
-
- if (len > 0) {
- buf->iov[buf->iovcnt].iov_base = (void *)data;
- buf->iov[buf->iovcnt].iov_len = (size_t)len;
- buf->iovcnt++;
- }
-
- return 1;
-}
-
-int set_k8s_timestamp(char *buf, ssize_t buflen, const char *pipename)
-{
- struct tm *tm;
- struct timespec ts;
- char off_sign = '+';
- int off, len, err = -1;
-
- if (clock_gettime(CLOCK_REALTIME, &ts) < 0) {
- /* If CLOCK_REALTIME is not supported, we set nano seconds to 0 */
- if (errno == EINVAL) {
- ts.tv_nsec = 0;
- } else {
- return err;
- }
- }
-
- if ((tm = localtime(&ts.tv_sec)) == NULL)
- return err;
-
-
- off = (int) tm->tm_gmtoff;
- if (tm->tm_gmtoff < 0) {
- off_sign = '-';
- off = -off;
- }
-
- len = snprintf(buf, buflen, "%d-%02d-%02dT%02d:%02d:%02d.%09ld%c%02d:%02d %s ",
- tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday,
- tm->tm_hour, tm->tm_min, tm->tm_sec, ts.tv_nsec,
- off_sign, off / 3600, off % 3600, pipename);
-
- if (len < buflen)
- err = 0;
- return err;
-}
-
-/* stdpipe_t represents one of the std pipes (or NONE).
- * Sync with const in container_attach.go */
-typedef enum {
- NO_PIPE,
- STDIN_PIPE, /* unused */
- STDOUT_PIPE,
- STDERR_PIPE,
-} stdpipe_t;
-
-const char *stdpipe_name(stdpipe_t pipe)
-{
- switch (pipe) {
- case STDIN_PIPE:
- return "stdin";
- case STDOUT_PIPE:
- return "stdout";
- case STDERR_PIPE:
- return "stderr";
- default:
- return "NONE";
- }
-}
-
-/*
- * The CRI requires us to write logs with a (timestamp, stream, line) format
- * for every newline-separated line. write_k8s_log writes said format for every
- * line in buf, and will partially write the final line of the log if buf is
- * not terminated by a newline.
- */
-static int write_k8s_log(int fd, stdpipe_t pipe, const char *buf, ssize_t buflen)
-{
- char tsbuf[TSBUFLEN];
- writev_buffer_t bufv = {0};
- static int64_t bytes_written = 0;
- int64_t bytes_to_be_written = 0;
-
- /*
- * Use the same timestamp for every line of the log in this buffer.
- * There is no practical difference in the output since write(2) is
- * fast.
- */
- if (set_k8s_timestamp(tsbuf, sizeof tsbuf, stdpipe_name(pipe)))
- /* TODO: We should handle failures much more cleanly than this. */
- return -1;
-
- while (buflen > 0) {
- const char *line_end = NULL;
- ptrdiff_t line_len = 0;
- bool partial = FALSE;
-
- /* Find the end of the line, or alternatively the end of the buffer. */
- line_end = memchr(buf, '\n', buflen);
- if (line_end == NULL) {
- line_end = &buf[buflen-1];
- partial = TRUE;
- }
- line_len = line_end - buf + 1;
-
- /* This is line_len bytes + TSBUFLEN - 1 + 2 (- 1 is for ignoring \0). */
- bytes_to_be_written = line_len + TSBUFLEN + 1;
-
- /* If partial, then we add a \n */
- if (partial) {
- bytes_to_be_written += 1;
- }
-
- /*
- * We re-open the log file if writing out the bytes will exceed the max
- * log size. We also reset the state so that the new file is started with
- * a timestamp.
- */
- if ((opt_log_size_max > 0) && (bytes_written + bytes_to_be_written) > opt_log_size_max) {
- ninfo("Creating new log file");
- bytes_written = 0;
-
- /* Close the existing fd */
- close(fd);
-
- /* Unlink the file */
- if (unlink(opt_log_path) < 0) {
- pexit("Failed to unlink log file");
- }
-
- /* Open the log path file again */
- log_fd = open(opt_log_path, O_WRONLY | O_APPEND | O_CREAT | O_CLOEXEC, 0600);
- if (log_fd < 0)
- pexit("Failed to open log file");
- fd = log_fd;
- }
-
- /* Output the timestamp */
- if (writev_buffer_append_segment(fd, &bufv, tsbuf, -1) < 0) {
- nwarn("failed to write (timestamp, stream) to log");
- goto next;
- }
-
- /* Output log tag for partial or newline */
- if (partial) {
- if (writev_buffer_append_segment(fd, &bufv, "P ", -1) < 0) {
- nwarn("failed to write partial log tag");
- goto next;
- }
- } else {
- if (writev_buffer_append_segment(fd, &bufv, "F ", -1) < 0) {
- nwarn("failed to write end log tag");
- goto next;
- }
- }
-
- /* Output the actual contents. */
- if (writev_buffer_append_segment(fd, &bufv, buf, line_len) < 0) {
- nwarn("failed to write buffer to log");
- goto next;
- }
-
- /* Output a newline for partial */
- if (partial) {
- if (writev_buffer_append_segment(fd, &bufv, "\n", -1) < 0) {
- nwarn("failed to write newline to log");
- goto next;
- }
- }
-
- bytes_written += bytes_to_be_written;
-next:
- /* Update the head of the buffer remaining to output. */
- buf += line_len;
- buflen -= line_len;
- }
-
- if (writev_buffer_flush (fd, &bufv) < 0) {
- nwarn("failed to flush buffer to log");
- }
-
- ninfo("Total bytes written: %"PRId64"", bytes_written);
-
- return 0;
-}
-
-/*
- * Returns the path for specified controller name for a pid.
- * Returns NULL on error.
- */
-static char *process_cgroup_subsystem_path(int pid, const char *subsystem) {
- _cleanup_free_ char *cgroups_file_path = g_strdup_printf("/proc/%d/cgroup", pid);
- _cleanup_fclose_ FILE *fp = NULL;
- fp = fopen(cgroups_file_path, "re");
- if (fp == NULL) {
- nwarn("Failed to open cgroups file: %s", cgroups_file_path);
- return NULL;
- }
-
- _cleanup_free_ char *line = NULL;
- ssize_t read;
- size_t len = 0;
- char *ptr, *path;
- char *subsystem_path = NULL;
- int i;
- while ((read = getline(&line, &len, fp)) != -1) {
- _cleanup_strv_ char **subsystems = NULL;
- ptr = strchr(line, ':');
- if (ptr == NULL) {
- nwarn("Error parsing cgroup, ':' not found: %s", line);
- return NULL;
- }
- ptr++;
- path = strchr(ptr, ':');
- if (path == NULL) {
- nwarn("Error parsing cgroup, second ':' not found: %s", line);
- return NULL;
- }
- *path = 0;
- path++;
- subsystems = g_strsplit (ptr, ",", -1);
- for (i = 0; subsystems[i] != NULL; i++) {
- if (strcmp (subsystems[i], subsystem) == 0) {
- char *subpath = strchr(subsystems[i], '=');
- if (subpath == NULL) {
- subpath = ptr;
- } else {
- *subpath = 0;
- }
-
- subsystem_path = g_strdup_printf("%s/%s%s", CGROUP_ROOT, subpath, path);
- subsystem_path[strlen(subsystem_path) - 1] = '\0';
- return subsystem_path;
- }
- }
- }
-
- return NULL;
-}
-
-static char *escape_json_string(const char *str)
-{
- GString *escaped;
- const char *p;
-
- p = str;
- escaped = g_string_sized_new(strlen(str));
-
- while (*p != 0) {
- char c = *p++;
- if (c == '\\' || c == '"') {
- g_string_append_c(escaped, '\\');
- g_string_append_c(escaped, c);
- } else if (c == '\n') {
- g_string_append_printf (escaped, "\\n");
- } else if (c == '\t') {
- g_string_append_printf (escaped, "\\t");
- } else if ((c > 0 && c < 0x1f) || c == 0x7f) {
- g_string_append_printf (escaped, "\\u00%02x", (guint)c);
- } else {
- g_string_append_c (escaped, c);
- }
- }
-
- return g_string_free (escaped, FALSE);
-}
-
-static int get_pipe_fd_from_env(const char *envname)
-{
- char *pipe_str, *endptr;
- int pipe_fd;
-
- pipe_str = getenv(envname);
- if (pipe_str == NULL)
- return -1;
-
- errno = 0;
- pipe_fd = strtol(pipe_str, &endptr, 10);
- if (errno != 0 || *endptr != '\0')
- pexit("unable to parse %s", envname);
- if (fcntl(pipe_fd, F_SETFD, FD_CLOEXEC) == -1)
- pexit("unable to make %s CLOEXEC", envname);
-
- return pipe_fd;
-}
-
-static void add_argv(GPtrArray *argv_array, ...) G_GNUC_NULL_TERMINATED;
-
-static void add_argv(GPtrArray *argv_array, ...)
-{
- va_list args;
- char *arg;
-
- va_start (args, argv_array);
- while ((arg = va_arg (args, char *)))
- g_ptr_array_add (argv_array, arg);
- va_end (args);
-}
-
-static void end_argv(GPtrArray *argv_array)
-{
- g_ptr_array_add(argv_array, NULL);
-}
-
-/* Global state */
-
-static int runtime_status = -1;
-static int container_status = -1;
-
-static int masterfd_stdin = -1;
-static int masterfd_stdout = -1;
-static int masterfd_stderr = -1;
-
-/* Used for attach */
-static int conn_sock = -1;
-static int conn_sock_readable;
-static int conn_sock_writable;
-
-static int oom_event_fd = -1;
-static int attach_socket_fd = -1;
-static int console_socket_fd = -1;
-static int terminal_ctrl_fd = -1;
-
-static bool timed_out = FALSE;
-
-static GMainLoop *main_loop = NULL;
-
-static void conn_sock_shutdown(int how)
-{
- if (conn_sock == -1)
- return;
- shutdown(conn_sock, how);
- if (how & SHUT_RD)
- conn_sock_readable = false;
- if (how & SHUT_WR)
- conn_sock_writable = false;
- if (!conn_sock_writable && !conn_sock_readable) {
- close(conn_sock);
- conn_sock = -1;
- }
-}
-
-static gboolean stdio_cb(int fd, GIOCondition condition, gpointer user_data);
-
-static gboolean tty_hup_timeout_scheduled = false;
-
-static gboolean tty_hup_timeout_cb (G_GNUC_UNUSED gpointer user_data)
-{
- tty_hup_timeout_scheduled = false;
- g_unix_fd_add (masterfd_stdout, G_IO_IN, stdio_cb, GINT_TO_POINTER(STDOUT_PIPE));
- return G_SOURCE_REMOVE;
-}
-
-static bool read_stdio(int fd, stdpipe_t pipe, bool *eof)
-{
- #define STDIO_BUF_SIZE 8192 /* Sync with redirectResponseToOutputStreams() */
- /* We use one extra byte at the start, which we don't read into, instead
- we use that for marking the pipe when we write to the attached socket */
- char real_buf[STDIO_BUF_SIZE + 1];
- char *buf = real_buf + 1;
- ssize_t num_read = 0;
-
- if (eof)
- *eof = false;
-
- num_read = read(fd, buf, STDIO_BUF_SIZE);
- if (num_read == 0) {
- if (eof)
- *eof = true;
- return false;
- } else if (num_read < 0) {
- nwarn("stdio_input read failed %s", strerror(errno));
- return false;
- } else {
- if (write_k8s_log(log_fd, pipe, buf, num_read) < 0) {
- nwarn("write_k8s_log failed");
- return G_SOURCE_CONTINUE;
- }
-
- real_buf[0] = pipe;
- if (conn_sock_writable && write_all(conn_sock, real_buf, num_read+1) < 0) {
- nwarn("Failed to write to socket");
- conn_sock_shutdown(SHUT_WR);
- }
- return true;
- }
-}
-
-static void on_sigchld(G_GNUC_UNUSED int signal)
-{
- raise (SIGUSR1);
-}
-
-static void check_child_processes(GHashTable *pid_to_handler)
-{
- void (*cb) (GPid, int, gpointer);
-
- for (;;) {
- int status;
- pid_t pid = waitpid(-1, &status, WNOHANG);
-
- if (pid < 0 && errno == EINTR)
- continue;
- if (pid < 0 && errno == ECHILD) {
- g_main_loop_quit (main_loop);
- return;
- }
- if (pid < 0)
- pexit("Failed to read child process status");
-
- if (pid == 0)
- return;
-
- /* If we got here, pid > 0, so we have a valid pid to check. */
- cb = g_hash_table_lookup(pid_to_handler, &pid);
- if (cb)
- cb(pid, status, 0);
- }
-}
-
-static gboolean on_sigusr1_cb(gpointer user_data)
-{
- GHashTable *pid_to_handler = (GHashTable *) user_data;
- check_child_processes (pid_to_handler);
- return G_SOURCE_CONTINUE;
-}
-
-static gboolean stdio_cb(int fd, GIOCondition condition, gpointer user_data)
-{
- stdpipe_t pipe = GPOINTER_TO_INT(user_data);
- bool read_eof = false;
- bool has_input = (condition & G_IO_IN) != 0;
- bool has_hup = (condition & G_IO_HUP) != 0;
-
- /* When we get here, condition can be G_IO_IN and/or G_IO_HUP.
- IN means there is some data to read.
- HUP means the other side closed the fd. In the case of a pine
- this in final, and we will never get more data. However, in the
- terminal case this just means that nobody has the terminal
- open at this point, and this can be change whenever someone
- opens the tty */
-
- /* Read any data before handling hup */
- if (has_input) {
- read_stdio(fd, pipe, &read_eof);
- }
-
- if (has_hup && opt_terminal && pipe == STDOUT_PIPE) {
- /* We got a HUP from the terminal master this means there
- are no open slaves ptys atm, and we will get a lot
- of wakeups until we have one, switch to polling
- mode. */
-
- /* If we read some data this cycle, wait one more, maybe there
- is more in the buffer before we handle the hup */
- if (has_input && !read_eof) {
- return G_SOURCE_CONTINUE;
- }
-
- if (!tty_hup_timeout_scheduled) {
- g_timeout_add (100, tty_hup_timeout_cb, NULL);
- }
- tty_hup_timeout_scheduled = true;
- return G_SOURCE_REMOVE;
- }
-
- if (read_eof || (has_hup && !has_input)) {
- /* End of input */
- if (pipe == STDOUT_PIPE)
- masterfd_stdout = -1;
- if (pipe == STDERR_PIPE)
- masterfd_stderr = -1;
-
- close (fd);
- return G_SOURCE_REMOVE;
- }
-
- return G_SOURCE_CONTINUE;
-}
-
-static gboolean timeout_cb (G_GNUC_UNUSED gpointer user_data)
-{
- timed_out = TRUE;
- ninfo ("Timed out, killing main loop");
- g_main_loop_quit (main_loop);
- return G_SOURCE_REMOVE;
-}
-
-static gboolean oom_cb(int fd, GIOCondition condition, G_GNUC_UNUSED gpointer user_data)
-{
- uint64_t oom_event;
- ssize_t num_read = 0;
-
- if ((condition & G_IO_IN) != 0) {
- num_read = read(fd, &oom_event, sizeof(uint64_t));
- if (num_read < 0) {
- nwarn("Failed to read oom event from eventfd");
- return G_SOURCE_CONTINUE;
- }
-
- if (num_read > 0) {
- if (num_read != sizeof(uint64_t))
- nwarn("Failed to read full oom event from eventfd");
- ninfo("OOM received");
- if (open("oom", O_CREAT, 0666) < 0) {
- nwarn("Failed to write oom file");
- }
- return G_SOURCE_CONTINUE;
- }
- }
-
- /* End of input */
- close (fd);
- oom_event_fd = -1;
- return G_SOURCE_REMOVE;
-}
-
-static gboolean conn_sock_cb(int fd, GIOCondition condition, G_GNUC_UNUSED gpointer user_data)
-{
- #define CONN_SOCK_BUF_SIZE 32*1024 /* Match the write size in CopyDetachable */
- char buf[CONN_SOCK_BUF_SIZE];
- ssize_t num_read = 0;
-
- if ((condition & G_IO_IN) != 0) {
- num_read = read(fd, buf, CONN_SOCK_BUF_SIZE);
- if (num_read < 0)
- return G_SOURCE_CONTINUE;
-
- if (num_read > 0 && masterfd_stdin >= 0) {
- if (write_all(masterfd_stdin, buf, num_read) < 0) {
- nwarn("Failed to write to container stdin");
- }
- return G_SOURCE_CONTINUE;
- }
- }
-
- /* End of input */
- conn_sock_shutdown(SHUT_RD);
- if (masterfd_stdin >= 0 && opt_stdin) {
- close(masterfd_stdin);
- masterfd_stdin = -1;
- }
- return G_SOURCE_REMOVE;
-}
-
-static gboolean attach_cb(int fd, G_GNUC_UNUSED GIOCondition condition, G_GNUC_UNUSED gpointer user_data)
-{
- conn_sock = accept(fd, NULL, NULL);
- if (conn_sock == -1) {
- if (errno != EWOULDBLOCK)
- nwarn("Failed to accept client connection on attach socket");
- } else {
- conn_sock_readable = true;
- conn_sock_writable = true;
- g_unix_fd_add (conn_sock, G_IO_IN|G_IO_HUP|G_IO_ERR, conn_sock_cb, GINT_TO_POINTER(STDOUT_PIPE));
- ninfo("Accepted connection %d", conn_sock);
- }
-
- return G_SOURCE_CONTINUE;
-}
-
-static gboolean ctrl_cb(int fd, G_GNUC_UNUSED GIOCondition condition, G_GNUC_UNUSED gpointer user_data)
-{
- #define CTLBUFSZ 200
- static char ctlbuf[CTLBUFSZ];
- static int readsz = CTLBUFSZ - 1;
- static char *readptr = ctlbuf;
- ssize_t num_read = 0;
- int ctl_msg_type = -1;
- int height = -1;
- int width = -1;
- struct winsize ws;
- int ret;
-
- num_read = read(fd, readptr, readsz);
- if (num_read <= 0) {
- nwarn("Failed to read from control fd");
- return G_SOURCE_CONTINUE;
- }
-
- readptr[num_read] = '\0';
- ninfo("Got ctl message: %s\n", ctlbuf);
-
- char *beg = ctlbuf;
- char *newline = strchrnul(beg, '\n');
- /* Process each message which ends with a line */
- while (*newline != '\0') {
- ret = sscanf(ctlbuf, "%d %d %d\n", &ctl_msg_type, &height, &width);
- if (ret != 3) {
- nwarn("Failed to sscanf message");
- return G_SOURCE_CONTINUE;
- }
- ninfo("Message type: %d, Height: %d, Width: %d", ctl_msg_type, height, width);
- ret = ioctl(masterfd_stdout, TIOCGWINSZ, &ws);
- ninfo("Existing size: %d %d", ws.ws_row, ws.ws_col);
- ws.ws_row = height;
- ws.ws_col = width;
- ret = ioctl(masterfd_stdout, TIOCSWINSZ, &ws);
- if (ret == -1) {
- nwarn("Failed to set process pty terminal size");
- }
- beg = newline + 1;
- newline = strchrnul(beg, '\n');
- }
- if (num_read == (CTLBUFSZ - 1) && beg == ctlbuf) {
- /*
- * We did not find a newline in the entire buffer.
- * This shouldn't happen as our buffer is larger than
- * the message that we expect to receive.
- */
- nwarn("Could not find newline in entire buffer\n");
- } else if (*beg == '\0') {
- /* We exhausted all messages that were complete */
- readptr = ctlbuf;
- readsz = CTLBUFSZ - 1;
- } else {
- /*
- * We copy remaining data to beginning of buffer
- * and advance readptr after that.
- */
- int cp_rem = 0;
- do {
- ctlbuf[cp_rem++] = *beg++;
- } while (*beg != '\0');
- readptr = ctlbuf + cp_rem;
- readsz = CTLBUFSZ - 1 - cp_rem;
- }
-
- return G_SOURCE_CONTINUE;
-}
-
-static gboolean terminal_accept_cb(int fd, G_GNUC_UNUSED GIOCondition condition, G_GNUC_UNUSED gpointer user_data)
-{
- const char *csname = user_data;
- struct file_t console;
- int connfd = -1;
- struct termios tset;
-
- ninfo("about to accept from console_socket_fd: %d", fd);
- connfd = accept4(fd, NULL, NULL, SOCK_CLOEXEC);
- if (connfd < 0) {
- nwarn("Failed to accept console-socket connection");
- return G_SOURCE_CONTINUE;
- }
-
- /* Not accepting anything else. */
- close(fd);
- unlink(csname);
-
- /* We exit if this fails. */
- ninfo("about to recvfd from connfd: %d", connfd);
- console = recvfd(connfd);
-
- ninfo("console = {.name = '%s'; .fd = %d}", console.name, console.fd);
- free(console.name);
-
- /* We change the terminal settings to match kube settings */
- if (tcgetattr(console.fd, &tset) == -1)
- pexit("Failed to get console terminal settings");
-
- tset.c_oflag |= ONLCR;
-
- if (tcsetattr(console.fd, TCSANOW, &tset) == -1)
- pexit("Failed to set console terminal settings");
-
- /* We only have a single fd for both pipes, so we just treat it as
- * stdout. stderr is ignored. */
- masterfd_stdin = console.fd;
- masterfd_stdout = console.fd;
-
- /* Clean up everything */
- close(connfd);
-
- return G_SOURCE_CONTINUE;
-}
-
-static void
-runtime_exit_cb (G_GNUC_UNUSED GPid pid, int status, G_GNUC_UNUSED gpointer user_data)
-{
- runtime_status = status;
- g_main_loop_quit (main_loop);
-}
-
-static void
-container_exit_cb (G_GNUC_UNUSED GPid pid, int status, G_GNUC_UNUSED gpointer user_data)
-{
- ninfo("container %d exited with status %d\n", pid, status);
- container_status = status;
- g_main_loop_quit (main_loop);
-}
-
-static void write_sync_fd(int sync_pipe_fd, int res, const char *message)
-{
- _cleanup_free_ char *escaped_message = NULL;
- _cleanup_free_ char *json = NULL;
- const char *res_key;
- ssize_t len;
-
- if (sync_pipe_fd == -1)
- return;
-
- if (opt_exec)
- res_key = "exit_code";
- else
- res_key = "pid";
-
- if (message) {
- escaped_message = escape_json_string(message);
- json = g_strdup_printf ("{\"%s\": %d, \"message\": \"%s\"}\n", res_key, res, escaped_message);
- } else {
- json = g_strdup_printf ("{\"%s\": %d}\n", res_key, res);
- }
-
- len = strlen(json);
- if (write_all(sync_pipe_fd, json, len) != len) {
- pexit("Unable to send container stderr message to parent");
- }
-}
-
-static char *setup_console_socket(void)
-{
- struct sockaddr_un addr = {0};
- _cleanup_free_ const char *tmpdir = g_get_tmp_dir();
- _cleanup_free_ char *csname = g_build_filename(tmpdir, "conmon-term.XXXXXX", NULL);
- /*
- * Generate a temporary name. Is this unsafe? Probably, but we can
- * replace it with a rename(2) setup if necessary.
- */
-
- int unusedfd = g_mkstemp(csname);
- if (unusedfd < 0)
- pexit("Failed to generate random path for console-socket");
- close(unusedfd);
-
- addr.sun_family = AF_UNIX;
- strncpy(addr.sun_path, csname, sizeof(addr.sun_path)-1);
-
- ninfo("addr{sun_family=AF_UNIX, sun_path=%s}", addr.sun_path);
-
- /* Bind to the console socket path. */
- console_socket_fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0);
- if (console_socket_fd < 0)
- pexit("Failed to create console-socket");
- if (fchmod(console_socket_fd, 0700))
- pexit("Failed to change console-socket permissions");
- /* XXX: This should be handled with a rename(2). */
- if (unlink(csname) < 0)
- pexit("Failed to unlink temporary random path");
- if (bind(console_socket_fd, (struct sockaddr *) &addr, sizeof(addr)) < 0)
- pexit("Failed to bind to console-socket");
- if (listen(console_socket_fd, 128) < 0)
- pexit("Failed to listen on console-socket");
-
- return g_strdup(csname);
-}
-
-static char *setup_attach_socket(void)
-{
- _cleanup_free_ char *attach_sock_path = NULL;
- char *attach_symlink_dir_path;
- struct sockaddr_un attach_addr = {0};
- attach_addr.sun_family = AF_UNIX;
-
- /*
- * Create a symlink so we don't exceed unix domain socket
- * path length limit.
- */
- attach_symlink_dir_path = g_build_filename(opt_socket_path, opt_cuuid, NULL);
- if (unlink(attach_symlink_dir_path) == -1 && errno != ENOENT)
- pexit("Failed to remove existing symlink for attach socket directory");
-
- if (symlink(opt_bundle_path, attach_symlink_dir_path) == -1)
- pexit("Failed to create symlink for attach socket");
-
- attach_sock_path = g_build_filename(opt_socket_path, opt_cuuid, "attach", NULL);
- ninfo("attach sock path: %s", attach_sock_path);
-
- strncpy(attach_addr.sun_path, attach_sock_path, sizeof(attach_addr.sun_path) - 1);
- ninfo("addr{sun_family=AF_UNIX, sun_path=%s}", attach_addr.sun_path);
-
- /*
- * We make the socket non-blocking to avoid a race where client aborts connection
- * before the server gets a chance to call accept. In that scenario, the server
- * accept blocks till a new client connection comes in.
- */
- attach_socket_fd = socket(AF_UNIX, SOCK_SEQPACKET|SOCK_NONBLOCK|SOCK_CLOEXEC, 0);
- if (attach_socket_fd == -1)
- pexit("Failed to create attach socket");
-
- if (fchmod(attach_socket_fd, 0700))
- pexit("Failed to change attach socket permissions");
-
- if (bind(attach_socket_fd, (struct sockaddr *)&attach_addr, sizeof(struct sockaddr_un)) == -1)
- pexit("Failed to bind attach socket: %s", attach_sock_path);
-
- if (listen(attach_socket_fd, 10) == -1)
- pexit("Failed to listen on attach socket: %s", attach_sock_path);
-
- g_unix_fd_add (attach_socket_fd, G_IO_IN, attach_cb, NULL);
-
- return attach_symlink_dir_path;
-}
-
-static void setup_terminal_control_fifo()
-{
- _cleanup_free_ char *ctl_fifo_path = g_build_filename(opt_bundle_path, "ctl", NULL);
- ninfo("ctl fifo path: %s", ctl_fifo_path);
-
- /* Setup fifo for reading in terminal resize and other stdio control messages */
-
- if (mkfifo(ctl_fifo_path, 0666) == -1)
- pexit("Failed to mkfifo at %s", ctl_fifo_path);
-
- terminal_ctrl_fd = open(ctl_fifo_path, O_RDONLY|O_NONBLOCK|O_CLOEXEC);
- if (terminal_ctrl_fd == -1)
- pexit("Failed to open control fifo");
-
- /*
- * Open a dummy writer to prevent getting flood of POLLHUPs when
- * last writer closes.
- */
- int dummyfd = open(ctl_fifo_path, O_WRONLY|O_CLOEXEC);
- if (dummyfd == -1)
- pexit("Failed to open dummy writer for fifo");
-
- g_unix_fd_add (terminal_ctrl_fd, G_IO_IN, ctrl_cb, NULL);
-
- ninfo("terminal_ctrl_fd: %d", terminal_ctrl_fd);
-}
-
-static void setup_oom_handling(int container_pid)
-{
- /* Setup OOM notification for container process */
- _cleanup_free_ char *memory_cgroup_path = process_cgroup_subsystem_path(container_pid, "memory");
- _cleanup_close_ int cfd = -1;
- int ofd = -1; /* Not closed */
- if (!memory_cgroup_path) {
- nexit("Failed to get memory cgroup path");
- }
-
- _cleanup_free_ char *memory_cgroup_file_path = g_build_filename(memory_cgroup_path, "cgroup.event_control", NULL);
-
- if ((cfd = open(memory_cgroup_file_path, O_WRONLY | O_CLOEXEC)) == -1) {
- nwarn("Failed to open %s", memory_cgroup_file_path);
- return;
- }
-
- _cleanup_free_ char *memory_cgroup_file_oom_path = g_build_filename(memory_cgroup_path, "memory.oom_control", NULL);
- if ((ofd = open(memory_cgroup_file_oom_path, O_RDONLY | O_CLOEXEC)) == -1)
- pexit("Failed to open %s", memory_cgroup_file_oom_path);
-
- if ((oom_event_fd = eventfd(0, EFD_CLOEXEC)) == -1)
- pexit("Failed to create eventfd");
-
- _cleanup_free_ char *data = g_strdup_printf("%d %d", oom_event_fd, ofd);
- if (write_all(cfd, data, strlen(data)) < 0)
- pexit("Failed to write to cgroup.event_control");
-
- g_unix_fd_add (oom_event_fd, G_IO_IN, oom_cb, NULL);
-}
-
-int main(int argc, char *argv[])
-{
- int ret;
- char cwd[PATH_MAX];
- _cleanup_free_ char *default_pid_file = NULL;
- _cleanup_free_ char *csname = NULL;
- GError *err = NULL;
- _cleanup_free_ char *contents = NULL;
- int container_pid = -1;
- pid_t main_pid, create_pid;
- /* Used for !terminal cases. */
- int slavefd_stdin = -1;
- int slavefd_stdout = -1;
- int slavefd_stderr = -1;
- char buf[BUF_SIZE];
- int num_read;
- int sync_pipe_fd = -1;
- int start_pipe_fd = -1;
- GError *error = NULL;
- GOptionContext *context;
- GPtrArray *runtime_argv = NULL;
- _cleanup_close_ int dev_null_r = -1;
- _cleanup_close_ int dev_null_w = -1;
- int fds[2];
-
- main_loop = g_main_loop_new (NULL, FALSE);
-
- /* Command line parameters */
- context = g_option_context_new("- conmon utility");
- g_option_context_add_main_entries(context, opt_entries, "conmon");
- if (!g_option_context_parse(context, &argc, &argv, &error)) {
- g_print("option parsing failed: %s\n", error->message);
- exit(1);
- }
-
- if (opt_cid == NULL)
- nexit("Container ID not provided. Use --cid");
-
- if (!opt_exec && opt_cuuid == NULL)
- nexit("Container UUID not provided. Use --cuuid");
-
- if (opt_runtime_path == NULL)
- nexit("Runtime path not provided. Use --runtime");
-
- if (!opt_exec && opt_exit_dir == NULL)
- nexit("Container exit directory not provided. Use --exit-dir");
-
- if (opt_bundle_path == NULL && !opt_exec) {
- if (getcwd(cwd, sizeof(cwd)) == NULL) {
- nexit("Failed to get working directory");
- }
- opt_bundle_path = cwd;
- }
-
- dev_null_r = open("/dev/null", O_RDONLY | O_CLOEXEC);
- if (dev_null_r < 0)
- pexit("Failed to open /dev/null");
-
- dev_null_w = open("/dev/null", O_WRONLY | O_CLOEXEC);
- if (dev_null_w < 0)
- pexit("Failed to open /dev/null");
-
- if (opt_exec && opt_exec_process_spec == NULL) {
- nexit("Exec process spec path not provided. Use --exec-process-spec");
- }
-
- if (opt_pid_file == NULL) {
- default_pid_file = g_strdup_printf ("%s/pidfile-%s", cwd, opt_cid);
- opt_pid_file = default_pid_file;
- }
-
- if (opt_log_path == NULL)
- nexit("Log file path not provided. Use --log-path");
-
- start_pipe_fd = get_pipe_fd_from_env("_OCI_STARTPIPE");
- if (start_pipe_fd >= 0) {
- /* Block for an initial write to the start pipe before
- spawning any childred or exiting, to ensure the
- parent can put us in the right cgroup. */
- read(start_pipe_fd, buf, BUF_SIZE);
- close(start_pipe_fd);
- }
-
- /* In the create-container case we double-fork in
- order to disconnect from the parent, as we want to
- continue in a daemon-like way */
- main_pid = fork();
- if (main_pid < 0) {
- pexit("Failed to fork the create command");
- } else if (main_pid != 0) {
- exit(0);
- }
-
- /* Disconnect stdio from parent. We need to do this, because
- the parent is waiting for the stdout to end when the intermediate
- child dies */
- if (dup2(dev_null_r, STDIN_FILENO) < 0)
- pexit("Failed to dup over stdin");
- if (dup2(dev_null_w, STDOUT_FILENO) < 0)
- pexit("Failed to dup over stdout");
- if (dup2(dev_null_w, STDERR_FILENO) < 0)
- pexit("Failed to dup over stderr");
-
- /* Create a new session group */
- setsid();
-
- /* Environment variables */
- sync_pipe_fd = get_pipe_fd_from_env("_OCI_SYNCPIPE");
-
- /* Open the log path file. */
- log_fd = open(opt_log_path, O_WRONLY | O_APPEND | O_CREAT | O_CLOEXEC, 0600);
- if (log_fd < 0)
- pexit("Failed to open log file");
-
- /*
- * Set self as subreaper so we can wait for container process
- * and return its exit code.
- */
- ret = prctl(PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0);
- if (ret != 0) {
- pexit("Failed to set as subreaper");
- }
-
- if (opt_terminal) {
- csname = setup_console_socket();
- } else {
-
- /*
- * Create a "fake" master fd so that we can use the same epoll code in
- * both cases. The slavefd_*s will be closed after we dup over
- * everything.
- *
- * We use pipes here because open(/dev/std{out,err}) will fail if we
- * used anything else (and it wouldn't be a good idea to create a new
- * pty pair in the host).
- */
-
- if (opt_stdin) {
- if (pipe2(fds, O_CLOEXEC) < 0)
- pexit("Failed to create !terminal stdin pipe");
-
- masterfd_stdin = fds[1];
- slavefd_stdin = fds[0];
- }
-
- if (pipe2(fds, O_CLOEXEC) < 0)
- pexit("Failed to create !terminal stdout pipe");
-
- masterfd_stdout = fds[0];
- slavefd_stdout = fds[1];
- }
-
- /* We always create a stderr pipe, because that way we can capture
- runc stderr messages before the tty is created */
- if (pipe2(fds, O_CLOEXEC) < 0)
- pexit("Failed to create stderr pipe");
-
- masterfd_stderr = fds[0];
- slavefd_stderr = fds[1];
-
- runtime_argv = g_ptr_array_new();
- add_argv(runtime_argv,
- opt_runtime_path,
- NULL);
-
- /* Generate the cmdline. */
- if (!opt_exec && opt_systemd_cgroup)
- add_argv(runtime_argv,
- "--systemd-cgroup",
- NULL);
-
- if (opt_exec) {
- add_argv(runtime_argv,
- "exec", "-d",
- "--pid-file", opt_pid_file,
- NULL);
- } else {
- add_argv(runtime_argv,
- "create",
- "--bundle", opt_bundle_path,
- "--pid-file", opt_pid_file,
- NULL);
- }
-
- if (!opt_exec && opt_no_pivot) {
- add_argv(runtime_argv,
- "--no-pivot",
- NULL);
- }
-
- if (csname != NULL) {
- add_argv(runtime_argv,
- "--console-socket", csname,
- NULL);
- }
-
- /* Set the exec arguments. */
- if (opt_exec) {
- add_argv(runtime_argv,
- "--process", opt_exec_process_spec,
- NULL);
- }
-
- /* Container name comes last. */
- add_argv(runtime_argv, opt_cid, NULL);
- end_argv(runtime_argv);
-
- /*
- * We have to fork here because the current runC API dups the stdio of the
- * calling process over the container's fds. This is actually *very bad*
- * but is currently being discussed for change in
- * https://github.com/opencontainers/runtime-spec/pull/513. Hopefully this
- * won't be the case for very long.
- */
-
- /* Create our container. */
- create_pid = fork();
- if (create_pid < 0) {
- pexit("Failed to fork the create command");
- } else if (!create_pid) {
- /* FIXME: This results in us not outputting runc error messages to crio's log. */
- if (slavefd_stdin < 0)
- slavefd_stdin = dev_null_r;
- if (dup2(slavefd_stdin, STDIN_FILENO) < 0)
- pexit("Failed to dup over stdout");
-
- if (slavefd_stdout < 0)
- slavefd_stdout = dev_null_w;
- if (dup2(slavefd_stdout, STDOUT_FILENO) < 0)
- pexit("Failed to dup over stdout");
-
- if (slavefd_stderr < 0)
- slavefd_stderr = slavefd_stdout;
- if (dup2(slavefd_stderr, STDERR_FILENO) < 0)
- pexit("Failed to dup over stderr");
-
- execv(g_ptr_array_index(runtime_argv,0), (char **)runtime_argv->pdata);
- exit(127);
- }
-
- g_ptr_array_free (runtime_argv, TRUE);
-
- /* The runtime has that fd now. We don't need to touch it anymore. */
- close(slavefd_stdin);
- close(slavefd_stdout);
- close(slavefd_stderr);
-
- /* Map pid to its handler. */
- GHashTable *pid_to_handler = g_hash_table_new (g_int_hash, g_int_equal);
- g_hash_table_insert (pid_to_handler, &create_pid, runtime_exit_cb);
-
- /*
- * Glib does not support SIGCHLD so use SIGUSR1 with the same semantic. We will
- * catch SIGCHLD and raise(SIGUSR1) in the signal handler.
- */
- g_unix_signal_add (SIGUSR1, on_sigusr1_cb, pid_to_handler);
-
- if (signal(SIGCHLD, on_sigchld) == SIG_ERR)
- pexit("Failed to set handler for SIGCHLD");
-
- ninfo("about to waitpid: %d", create_pid);
- if (csname != NULL) {
- guint terminal_watch = g_unix_fd_add (console_socket_fd, G_IO_IN, terminal_accept_cb, csname);
- /* Process any SIGCHLD we may have missed before the signal handler was in place. */
- check_child_processes (pid_to_handler);
- g_main_loop_run (main_loop);
- g_source_remove (terminal_watch);
- } else {
- int ret;
- /* Wait for our create child to exit with the return code. */
- do
- ret = waitpid(create_pid, &runtime_status, 0);
- while (ret < 0 && errno == EINTR);
- if (ret < 0) {
- int old_errno = errno;
- kill(create_pid, SIGKILL);
- errno = old_errno;
- pexit("Failed to wait for `runtime %s`", opt_exec ? "exec" : "create");
- }
-
- }
-
- if (!WIFEXITED(runtime_status) || WEXITSTATUS(runtime_status) != 0) {
- if (sync_pipe_fd > 0) {
- /*
- * Read from container stderr for any error and send it to parent
- * We send -1 as pid to signal to parent that create container has failed.
- */
- num_read = read(masterfd_stderr, buf, BUF_SIZE);
- if (num_read > 0) {
- buf[num_read] = '\0';
- write_sync_fd(sync_pipe_fd, -1, buf);
- }
- }
- nexit("Failed to create container: exit status %d", WEXITSTATUS(runtime_status));
- }
-
- if (opt_terminal && masterfd_stdout == -1)
- nexit("Runtime did not set up terminal");
-
- /* Read the pid so we can wait for the process to exit */
- g_file_get_contents(opt_pid_file, &contents, NULL, &err);
- if (err) {
- nwarn("Failed to read pidfile: %s", err->message);
- g_error_free(err);
- exit(1);
- }
-
- container_pid = atoi(contents);
- ninfo("container PID: %d", container_pid);
-
- g_hash_table_insert (pid_to_handler, &container_pid, container_exit_cb);
-
- /* Setup endpoint for attach */
- _cleanup_free_ char *attach_symlink_dir_path = NULL;
- if (!opt_exec) {
- attach_symlink_dir_path = setup_attach_socket();
- }
-
- if (!opt_exec) {
- setup_terminal_control_fifo();
- }
-
- /* Send the container pid back to parent */
- if (!opt_exec) {
- write_sync_fd(sync_pipe_fd, container_pid, NULL);
- }
-
- setup_oom_handling(container_pid);
-
- if (masterfd_stdout >= 0) {
- g_unix_fd_add (masterfd_stdout, G_IO_IN, stdio_cb, GINT_TO_POINTER(STDOUT_PIPE));
- }
- if (masterfd_stderr >= 0) {
- g_unix_fd_add (masterfd_stderr, G_IO_IN, stdio_cb, GINT_TO_POINTER(STDERR_PIPE));
- }
-
- if (opt_timeout > 0) {
- g_timeout_add_seconds (opt_timeout, timeout_cb, NULL);
- }
-
- check_child_processes(pid_to_handler);
-
- g_main_loop_run (main_loop);
-
- /* Drain stdout and stderr */
- if (masterfd_stdout != -1) {
- g_unix_set_fd_nonblocking(masterfd_stdout, TRUE, NULL);
- while (read_stdio(masterfd_stdout, STDOUT_PIPE, NULL))
- ;
- }
- if (masterfd_stderr != -1) {
- g_unix_set_fd_nonblocking(masterfd_stderr, TRUE, NULL);
- while (read_stdio(masterfd_stderr, STDERR_PIPE, NULL))
- ;
- }
-
- int exit_status = -1;
- const char *exit_message = NULL;
-
- if (timed_out) {
- kill(container_pid, SIGKILL);
- exit_message = "command timed out";
- } else {
- exit_status = WEXITSTATUS(container_status);
- }
-
- if (!opt_exec) {
- _cleanup_free_ char *status_str = g_strdup_printf("%d", exit_status);
- _cleanup_free_ char *exit_file_path = g_build_filename(opt_exit_dir, opt_cid, NULL);
- if (!g_file_set_contents(exit_file_path, status_str, -1, &err))
- nexit("Failed to write %s to exit file: %s\n",
- status_str, err->message);
- } else {
- /* Send the command exec exit code back to the parent */
- write_sync_fd(sync_pipe_fd, exit_status, exit_message);
- }
-
- if (attach_symlink_dir_path != NULL &&
- unlink(attach_symlink_dir_path) == -1 && errno != ENOENT) {
- pexit("Failed to remove symlink for attach socket directory");
- }
-
- return EXIT_SUCCESS;
-}
diff --git a/install.md b/install.md
index 498731c20..0973088d1 100644
--- a/install.md
+++ b/install.md
@@ -6,6 +6,10 @@
The latest version of `runc` is expected to be installed on the system. It is picked up as the default runtime by podman.
+#### conmon installed
+
+The latest version of `conmon` is expected to be installed on the system. Conmon is used to monitor OCI Runtimes
+
#### Setup CNI networking
A proper description of setting up CNI networking is given in the
@@ -22,6 +26,7 @@ Fedora, CentOS, RHEL, and related distributions:
```bash
yum install -y \
btrfs-progs-devel \
+ conmon \
device-mapper-devel \
git \
glib2-devel \
@@ -45,6 +50,7 @@ Debian, Ubuntu, and related distributions:
```bash
apt-get install -y \
btrfs-tools \
+ cri-o \
git \
golang-go \
libassuan-dev \
diff --git a/test/helpers.bash b/test/helpers.bash
index 16f3483f6..222e7bb26 100644
--- a/test/helpers.bash
+++ b/test/helpers.bash
@@ -18,12 +18,14 @@ else
fi
PODMAN_BINARY=${PODMAN_BINARY:-${CRIO_ROOT}/bin/podman}
-# Path of the conmon binary.
-CONMON_BINARY=${CONMON_BINARY:-${CRIO_ROOT}/bin/conmon}
# Path of the default seccomp profile.
SECCOMP_PROFILE=${SECCOMP_PROFILE:-${CRIO_ROOT}/seccomp.json}
# Name of the default apparmor profile.
APPARMOR_PROFILE=${APPARMOR_PROFILE:-crio-default}
+# Conmon
+CONMON=${CONMON:-conmon}
+CONMON_PATH=$(command -v $CONMON || true)
+CONMON_BINARY=${CONMON_PATH:-/usr/libexec/crio/conmon}
# Runtime
RUNTIME=${RUNTIME:-runc}
RUNTIME_PATH=$(command -v $RUNTIME || true)
@@ -116,8 +118,6 @@ cp ${CRIO_ROOT}/cni/* ${LIBPOD_CNI_CONFIG}
PODMAN_OPTIONS="--root $TESTDIR/crio $STORAGE_OPTIONS --runroot $TESTDIR/crio-run --runtime ${RUNTIME_BINARY} --conmon ${CONMON_BINARY} --cni-config-dir ${LIBPOD_CNI_CONFIG}"
-cp "$CONMON_BINARY" "$TESTDIR/conmon"
-
PATH=$PATH:$TESTDIR
for key in ${!IMAGES[@]}; do