8266490: Extend the OSContainer API to support the pids controller of cgroups

Reviewed-by: sgehwolf, lucy
This commit is contained in:
Matthias Baesken 2021-08-10 07:40:21 +00:00
parent 2384e12888
commit 089e83bf1b
22 changed files with 527 additions and 79 deletions

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -34,11 +34,15 @@
#include "runtime/os.hpp"
#include "utilities/globalDefinitions.hpp"
// controller names have to match the *_IDX indices
static const char* cg_controller_name[] = { "cpu", "cpuset", "cpuacct", "memory", "pids" };
CgroupSubsystem* CgroupSubsystemFactory::create() {
CgroupV1MemoryController* memory = NULL;
CgroupV1Controller* cpuset = NULL;
CgroupV1Controller* cpu = NULL;
CgroupV1Controller* cpuacct = NULL;
CgroupV1Controller* pids = NULL;
CgroupInfo cg_infos[CG_INFO_LENGTH];
u1 cg_type_flags = INVALID_CGROUPS_GENERIC;
const char* proc_cgroups = "/proc/cgroups";
@ -93,22 +97,29 @@ CgroupSubsystem* CgroupSubsystemFactory::create() {
assert(is_cgroup_v1(&cg_type_flags), "Cgroup v1 expected");
for (int i = 0; i < CG_INFO_LENGTH; i++) {
CgroupInfo info = cg_infos[i];
if (strcmp(info._name, "memory") == 0) {
memory = new CgroupV1MemoryController(info._root_mount_path, info._mount_path);
memory->set_subsystem_path(info._cgroup_path);
} else if (strcmp(info._name, "cpuset") == 0) {
cpuset = new CgroupV1Controller(info._root_mount_path, info._mount_path);
cpuset->set_subsystem_path(info._cgroup_path);
} else if (strcmp(info._name, "cpu") == 0) {
cpu = new CgroupV1Controller(info._root_mount_path, info._mount_path);
cpu->set_subsystem_path(info._cgroup_path);
} else if (strcmp(info._name, "cpuacct") == 0) {
cpuacct = new CgroupV1Controller(info._root_mount_path, info._mount_path);
cpuacct->set_subsystem_path(info._cgroup_path);
if (info._data_complete) { // pids controller might have incomplete data
if (strcmp(info._name, "memory") == 0) {
memory = new CgroupV1MemoryController(info._root_mount_path, info._mount_path);
memory->set_subsystem_path(info._cgroup_path);
} else if (strcmp(info._name, "cpuset") == 0) {
cpuset = new CgroupV1Controller(info._root_mount_path, info._mount_path);
cpuset->set_subsystem_path(info._cgroup_path);
} else if (strcmp(info._name, "cpu") == 0) {
cpu = new CgroupV1Controller(info._root_mount_path, info._mount_path);
cpu->set_subsystem_path(info._cgroup_path);
} else if (strcmp(info._name, "cpuacct") == 0) {
cpuacct = new CgroupV1Controller(info._root_mount_path, info._mount_path);
cpuacct->set_subsystem_path(info._cgroup_path);
} else if (strcmp(info._name, "pids") == 0) {
pids = new CgroupV1Controller(info._root_mount_path, info._mount_path);
pids->set_subsystem_path(info._cgroup_path);
}
} else {
log_debug(os, container)("CgroupInfo for %s not complete", cg_controller_name[i]);
}
}
cleanup(cg_infos);
return new CgroupV1Subsystem(cpuset, cpu, cpuacct, memory);
return new CgroupV1Subsystem(cpuset, cpu, cpuacct, pids, memory);
}
bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
@ -122,9 +133,10 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
char buf[MAXPATHLEN+1];
char *p;
bool is_cgroupsV2;
// true iff all controllers, memory, cpu, cpuset, cpuacct are enabled
// true iff all required controllers, memory, cpu, cpuset, cpuacct are enabled
// at the kernel level.
bool all_controllers_enabled;
// pids might not be enabled on older Linux distros (SLES 12.1, RHEL 7.1)
bool all_required_controllers_enabled;
/*
* Read /proc/cgroups so as to be able to distinguish cgroups v2 vs cgroups v1.
@ -136,10 +148,9 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
*/
cgroups = fopen(proc_cgroups, "r");
if (cgroups == NULL) {
log_debug(os, container)("Can't open %s, %s",
proc_cgroups, os::strerror(errno));
*flags = INVALID_CGROUPS_GENERIC;
return false;
log_debug(os, container)("Can't open %s, %s", proc_cgroups, os::strerror(errno));
*flags = INVALID_CGROUPS_GENERIC;
return false;
}
while ((p = fgets(buf, MAXPATHLEN, cgroups)) != NULL) {
@ -167,19 +178,30 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
cg_infos[CPUACCT_IDX]._name = os::strdup(name);
cg_infos[CPUACCT_IDX]._hierarchy_id = hierarchy_id;
cg_infos[CPUACCT_IDX]._enabled = (enabled == 1);
} else if (strcmp(name, "pids") == 0) {
log_debug(os, container)("Detected optional pids controller entry in %s", proc_cgroups);
cg_infos[PIDS_IDX]._name = os::strdup(name);
cg_infos[PIDS_IDX]._hierarchy_id = hierarchy_id;
cg_infos[PIDS_IDX]._enabled = (enabled == 1);
}
}
fclose(cgroups);
is_cgroupsV2 = true;
all_controllers_enabled = true;
all_required_controllers_enabled = true;
for (int i = 0; i < CG_INFO_LENGTH; i++) {
is_cgroupsV2 = is_cgroupsV2 && cg_infos[i]._hierarchy_id == 0;
all_controllers_enabled = all_controllers_enabled && cg_infos[i]._enabled;
// pids controller is optional. All other controllers are required
if (i != PIDS_IDX) {
is_cgroupsV2 = is_cgroupsV2 && cg_infos[i]._hierarchy_id == 0;
all_required_controllers_enabled = all_required_controllers_enabled && cg_infos[i]._enabled;
}
if (log_is_enabled(Debug, os, container) && !cg_infos[i]._enabled) {
log_debug(os, container)("controller %s is not enabled\n", cg_controller_name[i]);
}
}
if (!all_controllers_enabled) {
// one or more controllers disabled, disable container support
if (!all_required_controllers_enabled) {
// one or more required controllers disabled, disable container support
log_debug(os, container)("One or more required controllers disabled at kernel level.");
cleanup(cg_infos);
*flags = INVALID_CGROUPS_GENERIC;
@ -220,17 +242,21 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
while (!is_cgroupsV2 && (token = strsep(&controllers, ",")) != NULL) {
if (strcmp(token, "memory") == 0) {
assert(hierarchy_id == cg_infos[MEMORY_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
assert(hierarchy_id == cg_infos[MEMORY_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for memory");
cg_infos[MEMORY_IDX]._cgroup_path = os::strdup(cgroup_path);
} else if (strcmp(token, "cpuset") == 0) {
assert(hierarchy_id == cg_infos[CPUSET_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
assert(hierarchy_id == cg_infos[CPUSET_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for cpuset");
cg_infos[CPUSET_IDX]._cgroup_path = os::strdup(cgroup_path);
} else if (strcmp(token, "cpu") == 0) {
assert(hierarchy_id == cg_infos[CPU_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
assert(hierarchy_id == cg_infos[CPU_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for cpu");
cg_infos[CPU_IDX]._cgroup_path = os::strdup(cgroup_path);
} else if (strcmp(token, "cpuacct") == 0) {
assert(hierarchy_id == cg_infos[CPUACCT_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
assert(hierarchy_id == cg_infos[CPUACCT_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for cpuacc");
cg_infos[CPUACCT_IDX]._cgroup_path = os::strdup(cgroup_path);
} else if (strcmp(token, "pids") == 0) {
assert(hierarchy_id == cg_infos[PIDS_IDX]._hierarchy_id, "/proc/cgroups (%d) and /proc/self/cgroup (%d) hierarchy mismatch for pids",
cg_infos[PIDS_IDX]._hierarchy_id, hierarchy_id);
cg_infos[PIDS_IDX]._cgroup_path = os::strdup(cgroup_path);
}
}
if (is_cgroupsV2) {
@ -281,13 +307,15 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
/* Cgroup v1 relevant info
*
* Find the cgroup mount point for memory, cpuset, cpu, cpuacct
* Find the cgroup mount point for memory, cpuset, cpu, cpuacct, pids
*
* Example for docker:
* 219 214 0:29 /docker/7208cebd00fa5f2e342b1094f7bed87fa25661471a4637118e65f1c995be8a34 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory
*
* Example for host:
* 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory
*
* 44 31 0:39 / /sys/fs/cgroup/pids rw,nosuid,nodev,noexec,relatime shared:23 - cgroup cgroup rw,pids
*/
if (sscanf(p, "%*d %*d %*d:%*d %s %s %*[^-]- %s %*s %s", tmproot, tmpmount, tmp_fs_type, tmpcgroups) == 4) {
if (strcmp("cgroup", tmp_fs_type) != 0) {
@ -333,6 +361,12 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
cg_infos[CPUACCT_IDX]._mount_path = os::strdup(tmpmount);
cg_infos[CPUACCT_IDX]._root_mount_path = os::strdup(tmproot);
cg_infos[CPUACCT_IDX]._data_complete = true;
} else if (strcmp(token, "pids") == 0) {
any_cgroup_mounts_found = true;
assert(cg_infos[PIDS_IDX]._mount_path == NULL, "stomping of _mount_path");
cg_infos[PIDS_IDX]._mount_path = os::strdup(tmpmount);
cg_infos[PIDS_IDX]._root_mount_path = os::strdup(tmproot);
cg_infos[PIDS_IDX]._data_complete = true;
}
}
}
@ -387,10 +421,13 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
*flags = INVALID_CGROUPS_V1;
return false;
}
if (log_is_enabled(Debug, os, container) && !cg_infos[PIDS_IDX]._data_complete) {
log_debug(os, container)("Optional cgroup v1 pids subsystem not found");
// keep the other controller info, pids is optional
}
// Cgroups v1 case, we have all the info we need.
*flags = CGROUPS_V1;
return true;
};
void CgroupSubsystemFactory::cleanup(CgroupInfo* cg_infos) {
@ -514,3 +551,22 @@ jlong CgroupSubsystem::memory_limit_in_bytes() {
memory_limit->set_value(mem_limit, OSCONTAINER_CACHE_TIMEOUT);
return mem_limit;
}
jlong CgroupSubsystem::limit_from_str(char* limit_str) {
if (limit_str == NULL) {
return OSCONTAINER_ERROR;
}
// Unlimited memory in cgroups is the literal string 'max' for
// some controllers, for example the pids controller.
if (strcmp("max", limit_str) == 0) {
os::free(limit_str);
return (jlong)-1;
}
julong limit;
if (sscanf(limit_str, JULONG_FORMAT, &limit) != 1) {
os::free(limit_str);
return OSCONTAINER_ERROR;
}
os::free(limit_str);
return (jlong)limit;
}