mirror of
https://github.com/openjdk/jdk.git
synced 2025-09-16 09:04:41 +02:00
8230305: Cgroups v2: Container awareness
Implement Cgroups v2 container awareness in hotspot Reviewed-by: bobv, dholmes
This commit is contained in:
parent
71340f51fa
commit
d462a6b5c9
10 changed files with 1425 additions and 638 deletions
421
src/hotspot/os/linux/cgroupSubsystem_linux.cpp
Normal file
421
src/hotspot/os/linux/cgroupSubsystem_linux.cpp
Normal file
|
@ -0,0 +1,421 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <string.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include "cgroupSubsystem_linux.hpp"
|
||||||
|
#include "cgroupV1Subsystem_linux.hpp"
|
||||||
|
#include "cgroupV2Subsystem_linux.hpp"
|
||||||
|
#include "logging/log.hpp"
|
||||||
|
#include "memory/allocation.hpp"
|
||||||
|
#include "runtime/globals.hpp"
|
||||||
|
#include "runtime/os.hpp"
|
||||||
|
#include "utilities/globalDefinitions.hpp"
|
||||||
|
|
||||||
|
CgroupSubsystem* CgroupSubsystemFactory::create() {
|
||||||
|
CgroupV1MemoryController* memory = NULL;
|
||||||
|
CgroupV1Controller* cpuset = NULL;
|
||||||
|
CgroupV1Controller* cpu = NULL;
|
||||||
|
CgroupV1Controller* cpuacct = NULL;
|
||||||
|
FILE *mntinfo = NULL;
|
||||||
|
FILE *cgroups = NULL;
|
||||||
|
FILE *cgroup = NULL;
|
||||||
|
char buf[MAXPATHLEN+1];
|
||||||
|
char tmproot[MAXPATHLEN+1];
|
||||||
|
char tmpmount[MAXPATHLEN+1];
|
||||||
|
char *p;
|
||||||
|
bool is_cgroupsV2;
|
||||||
|
// true iff all controllers, memory, cpu, cpuset, cpuacct are enabled
|
||||||
|
// at the kernel level.
|
||||||
|
bool all_controllers_enabled;
|
||||||
|
|
||||||
|
CgroupInfo cg_infos[CG_INFO_LENGTH];
|
||||||
|
int cpuset_idx = 0;
|
||||||
|
int cpu_idx = 1;
|
||||||
|
int cpuacct_idx = 2;
|
||||||
|
int memory_idx = 3;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Read /proc/cgroups so as to be able to distinguish cgroups v2 vs cgroups v1.
|
||||||
|
*
|
||||||
|
* For cgroups v1 unified hierarchy, cpu, cpuacct, cpuset, memory controllers
|
||||||
|
* must have non-zero for the hierarchy ID field.
|
||||||
|
*/
|
||||||
|
cgroups = fopen("/proc/cgroups", "r");
|
||||||
|
if (cgroups == NULL) {
|
||||||
|
log_debug(os, container)("Can't open /proc/cgroups, %s",
|
||||||
|
os::strerror(errno));
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
while ((p = fgets(buf, MAXPATHLEN, cgroups)) != NULL) {
|
||||||
|
char name[MAXPATHLEN+1];
|
||||||
|
int hierarchy_id;
|
||||||
|
int enabled;
|
||||||
|
|
||||||
|
// Format of /proc/cgroups documented via man 7 cgroups
|
||||||
|
if (sscanf(p, "%s %d %*d %d", name, &hierarchy_id, &enabled) != 3) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (strcmp(name, "memory") == 0) {
|
||||||
|
cg_infos[memory_idx]._name = os::strdup(name);
|
||||||
|
cg_infos[memory_idx]._hierarchy_id = hierarchy_id;
|
||||||
|
cg_infos[memory_idx]._enabled = (enabled == 1);
|
||||||
|
} else if (strcmp(name, "cpuset") == 0) {
|
||||||
|
cg_infos[cpuset_idx]._name = os::strdup(name);
|
||||||
|
cg_infos[cpuset_idx]._hierarchy_id = hierarchy_id;
|
||||||
|
cg_infos[cpuset_idx]._enabled = (enabled == 1);
|
||||||
|
} else if (strcmp(name, "cpu") == 0) {
|
||||||
|
cg_infos[cpu_idx]._name = os::strdup(name);
|
||||||
|
cg_infos[cpu_idx]._hierarchy_id = hierarchy_id;
|
||||||
|
cg_infos[cpu_idx]._enabled = (enabled == 1);
|
||||||
|
} else if (strcmp(name, "cpuacct") == 0) {
|
||||||
|
cg_infos[cpuacct_idx]._name = os::strdup(name);
|
||||||
|
cg_infos[cpuacct_idx]._hierarchy_id = hierarchy_id;
|
||||||
|
cg_infos[cpuacct_idx]._enabled = (enabled == 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fclose(cgroups);
|
||||||
|
|
||||||
|
is_cgroupsV2 = true;
|
||||||
|
all_controllers_enabled = true;
|
||||||
|
for (int i = 0; i < CG_INFO_LENGTH; i++) {
|
||||||
|
is_cgroupsV2 = is_cgroupsV2 && cg_infos[i]._hierarchy_id == 0;
|
||||||
|
all_controllers_enabled = all_controllers_enabled && cg_infos[i]._enabled;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!all_controllers_enabled) {
|
||||||
|
// one or more controllers disabled, disable container support
|
||||||
|
log_debug(os, container)("One or more required controllers disabled at kernel level.");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Read /proc/self/cgroup and determine:
|
||||||
|
* - the cgroup path for cgroups v2 or
|
||||||
|
* - on a cgroups v1 system, collect info for mapping
|
||||||
|
* the host mount point to the local one via /proc/self/mountinfo below.
|
||||||
|
*/
|
||||||
|
cgroup = fopen("/proc/self/cgroup", "r");
|
||||||
|
if (cgroup == NULL) {
|
||||||
|
log_debug(os, container)("Can't open /proc/self/cgroup, %s",
|
||||||
|
os::strerror(errno));
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
while ((p = fgets(buf, MAXPATHLEN, cgroup)) != NULL) {
|
||||||
|
char *controllers;
|
||||||
|
char *token;
|
||||||
|
char *hierarchy_id_str;
|
||||||
|
int hierarchy_id;
|
||||||
|
char *cgroup_path;
|
||||||
|
|
||||||
|
hierarchy_id_str = strsep(&p, ":");
|
||||||
|
hierarchy_id = atoi(hierarchy_id_str);
|
||||||
|
/* Get controllers and base */
|
||||||
|
controllers = strsep(&p, ":");
|
||||||
|
cgroup_path = strsep(&p, "\n");
|
||||||
|
|
||||||
|
if (controllers == NULL) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (!is_cgroupsV2 && (token = strsep(&controllers, ",")) != NULL) {
|
||||||
|
if (strcmp(token, "memory") == 0) {
|
||||||
|
assert(hierarchy_id == cg_infos[memory_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
|
||||||
|
cg_infos[memory_idx]._cgroup_path = os::strdup(cgroup_path);
|
||||||
|
} else if (strcmp(token, "cpuset") == 0) {
|
||||||
|
assert(hierarchy_id == cg_infos[cpuset_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
|
||||||
|
cg_infos[cpuset_idx]._cgroup_path = os::strdup(cgroup_path);
|
||||||
|
} else if (strcmp(token, "cpu") == 0) {
|
||||||
|
assert(hierarchy_id == cg_infos[cpu_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
|
||||||
|
cg_infos[cpu_idx]._cgroup_path = os::strdup(cgroup_path);
|
||||||
|
} else if (strcmp(token, "cpuacct") == 0) {
|
||||||
|
assert(hierarchy_id == cg_infos[cpuacct_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
|
||||||
|
cg_infos[cpuacct_idx]._cgroup_path = os::strdup(cgroup_path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (is_cgroupsV2) {
|
||||||
|
for (int i = 0; i < CG_INFO_LENGTH; i++) {
|
||||||
|
cg_infos[i]._cgroup_path = os::strdup(cgroup_path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fclose(cgroup);
|
||||||
|
|
||||||
|
if (is_cgroupsV2) {
|
||||||
|
// Find the cgroup2 mount point by reading /proc/self/mountinfo
|
||||||
|
mntinfo = fopen("/proc/self/mountinfo", "r");
|
||||||
|
if (mntinfo == NULL) {
|
||||||
|
log_debug(os, container)("Can't open /proc/self/mountinfo, %s",
|
||||||
|
os::strerror(errno));
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
char cgroupv2_mount[MAXPATHLEN+1];
|
||||||
|
char fstype[MAXPATHLEN+1];
|
||||||
|
bool mount_point_found = false;
|
||||||
|
while ((p = fgets(buf, MAXPATHLEN, mntinfo)) != NULL) {
|
||||||
|
char *tmp_mount_point = cgroupv2_mount;
|
||||||
|
char *tmp_fs_type = fstype;
|
||||||
|
|
||||||
|
// mountinfo format is documented at https://www.kernel.org/doc/Documentation/filesystems/proc.txt
|
||||||
|
if (sscanf(p, "%*d %*d %*d:%*d %*s %s %*[^-]- %s cgroup2 %*s", tmp_mount_point, tmp_fs_type) == 2) {
|
||||||
|
// we likely have an early match return, be sure we have cgroup2 as fstype
|
||||||
|
if (strcmp("cgroup2", tmp_fs_type) == 0) {
|
||||||
|
mount_point_found = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fclose(mntinfo);
|
||||||
|
if (!mount_point_found) {
|
||||||
|
log_trace(os, container)("Mount point for cgroupv2 not found in /proc/self/mountinfo");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
// Cgroups v2 case, we have all the info we need.
|
||||||
|
// Construct the subsystem, free resources and return
|
||||||
|
// Note: any index in cg_infos will do as the path is the same for
|
||||||
|
// all controllers.
|
||||||
|
CgroupController* unified = new CgroupV2Controller(cgroupv2_mount, cg_infos[memory_idx]._cgroup_path);
|
||||||
|
for (int i = 0; i < CG_INFO_LENGTH; i++) {
|
||||||
|
os::free(cg_infos[i]._name);
|
||||||
|
os::free(cg_infos[i]._cgroup_path);
|
||||||
|
}
|
||||||
|
log_debug(os, container)("Detected cgroups v2 unified hierarchy");
|
||||||
|
return new CgroupV2Subsystem(unified);
|
||||||
|
}
|
||||||
|
|
||||||
|
// What follows is cgroups v1
|
||||||
|
log_debug(os, container)("Detected cgroups hybrid or legacy hierarchy, using cgroups v1 controllers");
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Find the cgroup mount point for memory and cpuset
|
||||||
|
* by reading /proc/self/mountinfo
|
||||||
|
*
|
||||||
|
* Example for docker:
|
||||||
|
* 219 214 0:29 /docker/7208cebd00fa5f2e342b1094f7bed87fa25661471a4637118e65f1c995be8a34 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory
|
||||||
|
*
|
||||||
|
* Example for host:
|
||||||
|
* 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory
|
||||||
|
*/
|
||||||
|
mntinfo = fopen("/proc/self/mountinfo", "r");
|
||||||
|
if (mntinfo == NULL) {
|
||||||
|
log_debug(os, container)("Can't open /proc/self/mountinfo, %s",
|
||||||
|
os::strerror(errno));
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
while ((p = fgets(buf, MAXPATHLEN, mntinfo)) != NULL) {
|
||||||
|
char tmpcgroups[MAXPATHLEN+1];
|
||||||
|
char *cptr = tmpcgroups;
|
||||||
|
char *token;
|
||||||
|
|
||||||
|
// mountinfo format is documented at https://www.kernel.org/doc/Documentation/filesystems/proc.txt
|
||||||
|
if (sscanf(p, "%*d %*d %*d:%*d %s %s %*[^-]- cgroup %*s %s", tmproot, tmpmount, tmpcgroups) != 3) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
while ((token = strsep(&cptr, ",")) != NULL) {
|
||||||
|
if (strcmp(token, "memory") == 0) {
|
||||||
|
memory = new CgroupV1MemoryController(tmproot, tmpmount);
|
||||||
|
} else if (strcmp(token, "cpuset") == 0) {
|
||||||
|
cpuset = new CgroupV1Controller(tmproot, tmpmount);
|
||||||
|
} else if (strcmp(token, "cpu") == 0) {
|
||||||
|
cpu = new CgroupV1Controller(tmproot, tmpmount);
|
||||||
|
} else if (strcmp(token, "cpuacct") == 0) {
|
||||||
|
cpuacct= new CgroupV1Controller(tmproot, tmpmount);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fclose(mntinfo);
|
||||||
|
|
||||||
|
if (memory == NULL) {
|
||||||
|
log_debug(os, container)("Required cgroup v1 memory subsystem not found");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
if (cpuset == NULL) {
|
||||||
|
log_debug(os, container)("Required cgroup v1 cpuset subsystem not found");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
if (cpu == NULL) {
|
||||||
|
log_debug(os, container)("Required cgroup v1 cpu subsystem not found");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
if (cpuacct == NULL) {
|
||||||
|
log_debug(os, container)("Required cgroup v1 cpuacct subsystem not found");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Use info gathered previously from /proc/self/cgroup
|
||||||
|
* and map host mount point to
|
||||||
|
* local one via /proc/self/mountinfo content above
|
||||||
|
*
|
||||||
|
* Docker example:
|
||||||
|
* 5:memory:/docker/6558aed8fc662b194323ceab5b964f69cf36b3e8af877a14b80256e93aecb044
|
||||||
|
*
|
||||||
|
* Host example:
|
||||||
|
* 5:memory:/user.slice
|
||||||
|
*
|
||||||
|
* Construct a path to the process specific memory and cpuset
|
||||||
|
* cgroup directory.
|
||||||
|
*
|
||||||
|
* For a container running under Docker from memory example above
|
||||||
|
* the paths would be:
|
||||||
|
*
|
||||||
|
* /sys/fs/cgroup/memory
|
||||||
|
*
|
||||||
|
* For a Host from memory example above the path would be:
|
||||||
|
*
|
||||||
|
* /sys/fs/cgroup/memory/user.slice
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
for (int i = 0; i < CG_INFO_LENGTH; i++) {
|
||||||
|
CgroupInfo info = cg_infos[i];
|
||||||
|
if (strcmp(info._name, "memory") == 0) {
|
||||||
|
memory->set_subsystem_path(info._cgroup_path);
|
||||||
|
} else if (strcmp(info._name, "cpuset") == 0) {
|
||||||
|
cpuset->set_subsystem_path(info._cgroup_path);
|
||||||
|
} else if (strcmp(info._name, "cpu") == 0) {
|
||||||
|
cpu->set_subsystem_path(info._cgroup_path);
|
||||||
|
} else if (strcmp(info._name, "cpuacct") == 0) {
|
||||||
|
cpuacct->set_subsystem_path(info._cgroup_path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new CgroupV1Subsystem(cpuset, cpu, cpuacct, memory);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* active_processor_count
|
||||||
|
*
|
||||||
|
* Calculate an appropriate number of active processors for the
|
||||||
|
* VM to use based on these three inputs.
|
||||||
|
*
|
||||||
|
* cpu affinity
|
||||||
|
* cgroup cpu quota & cpu period
|
||||||
|
* cgroup cpu shares
|
||||||
|
*
|
||||||
|
* Algorithm:
|
||||||
|
*
|
||||||
|
* Determine the number of available CPUs from sched_getaffinity
|
||||||
|
*
|
||||||
|
* If user specified a quota (quota != -1), calculate the number of
|
||||||
|
* required CPUs by dividing quota by period.
|
||||||
|
*
|
||||||
|
* If shares are in effect (shares != -1), calculate the number
|
||||||
|
* of CPUs required for the shares by dividing the share value
|
||||||
|
* by PER_CPU_SHARES.
|
||||||
|
*
|
||||||
|
* All results of division are rounded up to the next whole number.
|
||||||
|
*
|
||||||
|
* If neither shares or quotas have been specified, return the
|
||||||
|
* number of active processors in the system.
|
||||||
|
*
|
||||||
|
* If both shares and quotas have been specified, the results are
|
||||||
|
* based on the flag PreferContainerQuotaForCPUCount. If true,
|
||||||
|
* return the quota value. If false return the smallest value
|
||||||
|
* between shares or quotas.
|
||||||
|
*
|
||||||
|
* If shares and/or quotas have been specified, the resulting number
|
||||||
|
* returned will never exceed the number of active processors.
|
||||||
|
*
|
||||||
|
* return:
|
||||||
|
* number of CPUs
|
||||||
|
*/
|
||||||
|
int CgroupSubsystem::active_processor_count() {
|
||||||
|
int quota_count = 0, share_count = 0;
|
||||||
|
int cpu_count, limit_count;
|
||||||
|
int result;
|
||||||
|
|
||||||
|
// We use a cache with a timeout to avoid performing expensive
|
||||||
|
// computations in the event this function is called frequently.
|
||||||
|
// [See 8227006].
|
||||||
|
CachingCgroupController* contrl = cpu_controller();
|
||||||
|
CachedMetric* cpu_limit = contrl->metrics_cache();
|
||||||
|
if (!cpu_limit->should_check_metric()) {
|
||||||
|
int val = (int)cpu_limit->value();
|
||||||
|
log_trace(os, container)("CgroupSubsystem::active_processor_count (cached): %d", val);
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
cpu_count = limit_count = os::Linux::active_processor_count();
|
||||||
|
int quota = cpu_quota();
|
||||||
|
int period = cpu_period();
|
||||||
|
int share = cpu_shares();
|
||||||
|
|
||||||
|
if (quota > -1 && period > 0) {
|
||||||
|
quota_count = ceilf((float)quota / (float)period);
|
||||||
|
log_trace(os, container)("CPU Quota count based on quota/period: %d", quota_count);
|
||||||
|
}
|
||||||
|
if (share > -1) {
|
||||||
|
share_count = ceilf((float)share / (float)PER_CPU_SHARES);
|
||||||
|
log_trace(os, container)("CPU Share count based on shares: %d", share_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If both shares and quotas are setup results depend
|
||||||
|
// on flag PreferContainerQuotaForCPUCount.
|
||||||
|
// If true, limit CPU count to quota
|
||||||
|
// If false, use minimum of shares and quotas
|
||||||
|
if (quota_count !=0 && share_count != 0) {
|
||||||
|
if (PreferContainerQuotaForCPUCount) {
|
||||||
|
limit_count = quota_count;
|
||||||
|
} else {
|
||||||
|
limit_count = MIN2(quota_count, share_count);
|
||||||
|
}
|
||||||
|
} else if (quota_count != 0) {
|
||||||
|
limit_count = quota_count;
|
||||||
|
} else if (share_count != 0) {
|
||||||
|
limit_count = share_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
result = MIN2(cpu_count, limit_count);
|
||||||
|
log_trace(os, container)("OSContainer::active_processor_count: %d", result);
|
||||||
|
|
||||||
|
// Update cached metric to avoid re-reading container settings too often
|
||||||
|
cpu_limit->set_value(result, OSCONTAINER_CACHE_TIMEOUT);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* memory_limit_in_bytes
|
||||||
|
*
|
||||||
|
* Return the limit of available memory for this process.
|
||||||
|
*
|
||||||
|
* return:
|
||||||
|
* memory limit in bytes or
|
||||||
|
* -1 for unlimited
|
||||||
|
* OSCONTAINER_ERROR for not supported
|
||||||
|
*/
|
||||||
|
jlong CgroupSubsystem::memory_limit_in_bytes() {
|
||||||
|
CachingCgroupController* contrl = memory_controller();
|
||||||
|
CachedMetric* memory_limit = contrl->metrics_cache();
|
||||||
|
if (!memory_limit->should_check_metric()) {
|
||||||
|
return memory_limit->value();
|
||||||
|
}
|
||||||
|
jlong mem_limit = read_memory_limit_in_bytes();
|
||||||
|
// Update cached metric to avoid re-reading container settings too often
|
||||||
|
memory_limit->set_value(mem_limit, OSCONTAINER_CACHE_TIMEOUT);
|
||||||
|
return mem_limit;
|
||||||
|
}
|
264
src/hotspot/os/linux/cgroupSubsystem_linux.hpp
Normal file
264
src/hotspot/os/linux/cgroupSubsystem_linux.hpp
Normal file
|
@ -0,0 +1,264 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef CGROUP_SUBSYSTEM_LINUX_HPP
|
||||||
|
#define CGROUP_SUBSYSTEM_LINUX_HPP
|
||||||
|
|
||||||
|
#include "memory/allocation.hpp"
|
||||||
|
#include "runtime/os.hpp"
|
||||||
|
#include "logging/log.hpp"
|
||||||
|
#include "utilities/globalDefinitions.hpp"
|
||||||
|
#include "utilities/macros.hpp"
|
||||||
|
#include "osContainer_linux.hpp"
|
||||||
|
|
||||||
|
// Shared cgroups code (used by cgroup version 1 and version 2)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* PER_CPU_SHARES has been set to 1024 because CPU shares' quota
|
||||||
|
* is commonly used in cloud frameworks like Kubernetes[1],
|
||||||
|
* AWS[2] and Mesos[3] in a similar way. They spawn containers with
|
||||||
|
* --cpu-shares option values scaled by PER_CPU_SHARES. Thus, we do
|
||||||
|
* the inverse for determining the number of possible available
|
||||||
|
* CPUs to the JVM inside a container. See JDK-8216366.
|
||||||
|
*
|
||||||
|
* [1] https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#meaning-of-cpu
|
||||||
|
* In particular:
|
||||||
|
* When using Docker:
|
||||||
|
* The spec.containers[].resources.requests.cpu is converted to its core value, which is potentially
|
||||||
|
* fractional, and multiplied by 1024. The greater of this number or 2 is used as the value of the
|
||||||
|
* --cpu-shares flag in the docker run command.
|
||||||
|
* [2] https://docs.aws.amazon.com/AmazonECS/latest/APIReference/API_ContainerDefinition.html
|
||||||
|
* [3] https://github.com/apache/mesos/blob/3478e344fb77d931f6122980c6e94cd3913c441d/src/docker/docker.cpp#L648
|
||||||
|
* https://github.com/apache/mesos/blob/3478e344fb77d931f6122980c6e94cd3913c441d/src/slave/containerizer/mesos/isolators/cgroups/constants.hpp#L30
|
||||||
|
*/
|
||||||
|
#define PER_CPU_SHARES 1024
|
||||||
|
|
||||||
|
typedef char * cptr;
|
||||||
|
|
||||||
|
class CgroupController: public CHeapObj<mtInternal> {
|
||||||
|
public:
|
||||||
|
virtual char *subsystem_path();
|
||||||
|
};
|
||||||
|
|
||||||
|
PRAGMA_DIAG_PUSH
|
||||||
|
PRAGMA_FORMAT_NONLITERAL_IGNORED
|
||||||
|
template <typename T> int subsystem_file_line_contents(CgroupController* c,
|
||||||
|
const char *filename,
|
||||||
|
const char *matchline,
|
||||||
|
const char *scan_fmt,
|
||||||
|
T returnval) {
|
||||||
|
FILE *fp = NULL;
|
||||||
|
char *p;
|
||||||
|
char file[MAXPATHLEN+1];
|
||||||
|
char buf[MAXPATHLEN+1];
|
||||||
|
char discard[MAXPATHLEN+1];
|
||||||
|
bool found_match = false;
|
||||||
|
|
||||||
|
if (c == NULL) {
|
||||||
|
log_debug(os, container)("subsystem_file_line_contents: CgroupController* is NULL");
|
||||||
|
return OSCONTAINER_ERROR;
|
||||||
|
}
|
||||||
|
if (c->subsystem_path() == NULL) {
|
||||||
|
log_debug(os, container)("subsystem_file_line_contents: subsystem path is NULL");
|
||||||
|
return OSCONTAINER_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
strncpy(file, c->subsystem_path(), MAXPATHLEN);
|
||||||
|
file[MAXPATHLEN-1] = '\0';
|
||||||
|
int filelen = strlen(file);
|
||||||
|
if ((filelen + strlen(filename)) > (MAXPATHLEN-1)) {
|
||||||
|
log_debug(os, container)("File path too long %s, %s", file, filename);
|
||||||
|
return OSCONTAINER_ERROR;
|
||||||
|
}
|
||||||
|
strncat(file, filename, MAXPATHLEN-filelen);
|
||||||
|
log_trace(os, container)("Path to %s is %s", filename, file);
|
||||||
|
fp = fopen(file, "r");
|
||||||
|
if (fp != NULL) {
|
||||||
|
int err = 0;
|
||||||
|
while ((p = fgets(buf, MAXPATHLEN, fp)) != NULL) {
|
||||||
|
found_match = false;
|
||||||
|
if (matchline == NULL) {
|
||||||
|
// single-line file case
|
||||||
|
int matched = sscanf(p, scan_fmt, returnval);
|
||||||
|
found_match = (matched == 1);
|
||||||
|
} else {
|
||||||
|
// multi-line file case
|
||||||
|
if (strstr(p, matchline) != NULL) {
|
||||||
|
// discard matchline string prefix
|
||||||
|
int matched = sscanf(p, scan_fmt, discard, returnval);
|
||||||
|
found_match = (matched == 2);
|
||||||
|
} else {
|
||||||
|
continue; // substring not found
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (found_match) {
|
||||||
|
fclose(fp);
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
err = 1;
|
||||||
|
log_debug(os, container)("Type %s not found in file %s", scan_fmt, file);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (err == 0) {
|
||||||
|
log_debug(os, container)("Empty file %s", file);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
log_debug(os, container)("Open of file %s failed, %s", file, os::strerror(errno));
|
||||||
|
}
|
||||||
|
if (fp != NULL)
|
||||||
|
fclose(fp);
|
||||||
|
return OSCONTAINER_ERROR;
|
||||||
|
}
|
||||||
|
PRAGMA_DIAG_POP
|
||||||
|
|
||||||
|
#define GET_CONTAINER_INFO(return_type, subsystem, filename, \
|
||||||
|
logstring, scan_fmt, variable) \
|
||||||
|
return_type variable; \
|
||||||
|
{ \
|
||||||
|
int err; \
|
||||||
|
err = subsystem_file_line_contents(subsystem, \
|
||||||
|
filename, \
|
||||||
|
NULL, \
|
||||||
|
scan_fmt, \
|
||||||
|
&variable); \
|
||||||
|
if (err != 0) \
|
||||||
|
return (return_type) OSCONTAINER_ERROR; \
|
||||||
|
\
|
||||||
|
log_trace(os, container)(logstring, variable); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define GET_CONTAINER_INFO_CPTR(return_type, subsystem, filename, \
|
||||||
|
logstring, scan_fmt, variable, bufsize) \
|
||||||
|
char variable[bufsize]; \
|
||||||
|
{ \
|
||||||
|
int err; \
|
||||||
|
err = subsystem_file_line_contents(subsystem, \
|
||||||
|
filename, \
|
||||||
|
NULL, \
|
||||||
|
scan_fmt, \
|
||||||
|
variable); \
|
||||||
|
if (err != 0) \
|
||||||
|
return (return_type) NULL; \
|
||||||
|
\
|
||||||
|
log_trace(os, container)(logstring, variable); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define GET_CONTAINER_INFO_LINE(return_type, controller, filename, \
|
||||||
|
matchline, logstring, scan_fmt, variable) \
|
||||||
|
return_type variable; \
|
||||||
|
{ \
|
||||||
|
int err; \
|
||||||
|
err = subsystem_file_line_contents(controller, \
|
||||||
|
filename, \
|
||||||
|
matchline, \
|
||||||
|
scan_fmt, \
|
||||||
|
&variable); \
|
||||||
|
if (err != 0) \
|
||||||
|
return (return_type) OSCONTAINER_ERROR; \
|
||||||
|
\
|
||||||
|
log_trace(os, container)(logstring, variable); \
|
||||||
|
}
|
||||||
|
|
||||||
|
// Four controllers: cpu, cpuset, cpuacct, memory
|
||||||
|
#define CG_INFO_LENGTH 4
|
||||||
|
|
||||||
|
class CachedMetric : public CHeapObj<mtInternal>{
|
||||||
|
private:
|
||||||
|
volatile jlong _metric;
|
||||||
|
volatile jlong _next_check_counter;
|
||||||
|
public:
|
||||||
|
CachedMetric() {
|
||||||
|
_metric = -1;
|
||||||
|
_next_check_counter = min_jlong;
|
||||||
|
}
|
||||||
|
bool should_check_metric() {
|
||||||
|
return os::elapsed_counter() > _next_check_counter;
|
||||||
|
}
|
||||||
|
jlong value() { return _metric; }
|
||||||
|
void set_value(jlong value, jlong timeout) {
|
||||||
|
_metric = value;
|
||||||
|
// Metric is unlikely to change, but we want to remain
|
||||||
|
// responsive to configuration changes. A very short grace time
|
||||||
|
// between re-read avoids excessive overhead during startup without
|
||||||
|
// significantly reducing the VMs ability to promptly react to changed
|
||||||
|
// metric config
|
||||||
|
_next_check_counter = os::elapsed_counter() + timeout;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class CachingCgroupController : public CHeapObj<mtInternal> {
|
||||||
|
private:
|
||||||
|
CgroupController* _controller;
|
||||||
|
CachedMetric* _metrics_cache;
|
||||||
|
|
||||||
|
public:
|
||||||
|
CachingCgroupController(CgroupController* cont) {
|
||||||
|
_controller = cont;
|
||||||
|
_metrics_cache = new CachedMetric();
|
||||||
|
}
|
||||||
|
|
||||||
|
CachedMetric* metrics_cache() { return _metrics_cache; }
|
||||||
|
CgroupController* controller() { return _controller; }
|
||||||
|
};
|
||||||
|
|
||||||
|
class CgroupSubsystem: public CHeapObj<mtInternal> {
|
||||||
|
public:
|
||||||
|
jlong memory_limit_in_bytes();
|
||||||
|
int active_processor_count();
|
||||||
|
|
||||||
|
virtual int cpu_quota();
|
||||||
|
virtual int cpu_period();
|
||||||
|
virtual int cpu_shares();
|
||||||
|
virtual jlong memory_usage_in_bytes();
|
||||||
|
virtual jlong memory_and_swap_limit_in_bytes();
|
||||||
|
virtual jlong memory_soft_limit_in_bytes();
|
||||||
|
virtual jlong memory_max_usage_in_bytes();
|
||||||
|
virtual char * cpu_cpuset_cpus();
|
||||||
|
virtual char * cpu_cpuset_memory_nodes();
|
||||||
|
virtual jlong read_memory_limit_in_bytes();
|
||||||
|
virtual const char * container_type();
|
||||||
|
virtual CachingCgroupController* memory_controller();
|
||||||
|
virtual CachingCgroupController* cpu_controller();
|
||||||
|
};
|
||||||
|
|
||||||
|
class CgroupSubsystemFactory: AllStatic {
|
||||||
|
public:
|
||||||
|
static CgroupSubsystem* create();
|
||||||
|
};
|
||||||
|
|
||||||
|
// Class representing info in /proc/self/cgroup.
|
||||||
|
// See man 7 cgroups
|
||||||
|
class CgroupInfo : public StackObj {
|
||||||
|
friend class CgroupSubsystemFactory;
|
||||||
|
|
||||||
|
private:
|
||||||
|
char* _name;
|
||||||
|
int _hierarchy_id;
|
||||||
|
bool _enabled;
|
||||||
|
char* _cgroup_path;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
#endif // CGROUP_SUBSYSTEM_LINUX_HPP
|
243
src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp
Normal file
243
src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp
Normal file
|
@ -0,0 +1,243 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <string.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include "cgroupV1Subsystem_linux.hpp"
|
||||||
|
#include "logging/log.hpp"
|
||||||
|
#include "memory/allocation.hpp"
|
||||||
|
#include "runtime/globals.hpp"
|
||||||
|
#include "runtime/os.hpp"
|
||||||
|
#include "utilities/globalDefinitions.hpp"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set directory to subsystem specific files based
|
||||||
|
* on the contents of the mountinfo and cgroup files.
|
||||||
|
*/
|
||||||
|
void CgroupV1Controller::set_subsystem_path(char *cgroup_path) {
|
||||||
|
char buf[MAXPATHLEN+1];
|
||||||
|
if (_root != NULL && cgroup_path != NULL) {
|
||||||
|
if (strcmp(_root, "/") == 0) {
|
||||||
|
int buflen;
|
||||||
|
strncpy(buf, _mount_point, MAXPATHLEN);
|
||||||
|
buf[MAXPATHLEN-1] = '\0';
|
||||||
|
if (strcmp(cgroup_path,"/") != 0) {
|
||||||
|
buflen = strlen(buf);
|
||||||
|
if ((buflen + strlen(cgroup_path)) > (MAXPATHLEN-1)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
strncat(buf, cgroup_path, MAXPATHLEN-buflen);
|
||||||
|
buf[MAXPATHLEN-1] = '\0';
|
||||||
|
}
|
||||||
|
_path = os::strdup(buf);
|
||||||
|
} else {
|
||||||
|
if (strcmp(_root, cgroup_path) == 0) {
|
||||||
|
strncpy(buf, _mount_point, MAXPATHLEN);
|
||||||
|
buf[MAXPATHLEN-1] = '\0';
|
||||||
|
_path = os::strdup(buf);
|
||||||
|
} else {
|
||||||
|
char *p = strstr(cgroup_path, _root);
|
||||||
|
if (p != NULL && p == _root) {
|
||||||
|
if (strlen(cgroup_path) > strlen(_root)) {
|
||||||
|
int buflen;
|
||||||
|
strncpy(buf, _mount_point, MAXPATHLEN);
|
||||||
|
buf[MAXPATHLEN-1] = '\0';
|
||||||
|
buflen = strlen(buf);
|
||||||
|
if ((buflen + strlen(cgroup_path) - strlen(_root)) > (MAXPATHLEN-1)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
strncat(buf, cgroup_path + strlen(_root), MAXPATHLEN-buflen);
|
||||||
|
buf[MAXPATHLEN-1] = '\0';
|
||||||
|
_path = os::strdup(buf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* uses_mem_hierarchy
|
||||||
|
*
|
||||||
|
* Return whether or not hierarchical cgroup accounting is being
|
||||||
|
* done.
|
||||||
|
*
|
||||||
|
* return:
|
||||||
|
* A number > 0 if true, or
|
||||||
|
* OSCONTAINER_ERROR for not supported
|
||||||
|
*/
|
||||||
|
jlong CgroupV1MemoryController::uses_mem_hierarchy() {
|
||||||
|
GET_CONTAINER_INFO(jlong, this, "/memory.use_hierarchy",
|
||||||
|
"Use Hierarchy is: " JLONG_FORMAT, JLONG_FORMAT, use_hierarchy);
|
||||||
|
return use_hierarchy;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CgroupV1MemoryController::set_subsystem_path(char *cgroup_path) {
|
||||||
|
CgroupV1Controller::set_subsystem_path(cgroup_path);
|
||||||
|
jlong hierarchy = uses_mem_hierarchy();
|
||||||
|
if (hierarchy > 0) {
|
||||||
|
set_hierarchical(true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
jlong CgroupV1Subsystem::read_memory_limit_in_bytes() {
|
||||||
|
GET_CONTAINER_INFO(julong, _memory->controller(), "/memory.limit_in_bytes",
|
||||||
|
"Memory Limit is: " JULONG_FORMAT, JULONG_FORMAT, memlimit);
|
||||||
|
|
||||||
|
if (memlimit >= _unlimited_memory) {
|
||||||
|
log_trace(os, container)("Non-Hierarchical Memory Limit is: Unlimited");
|
||||||
|
CgroupV1MemoryController* mem_controller = reinterpret_cast<CgroupV1MemoryController*>(_memory->controller());
|
||||||
|
if (mem_controller->is_hierarchical()) {
|
||||||
|
const char* matchline = "hierarchical_memory_limit";
|
||||||
|
const char* format = "%s " JULONG_FORMAT;
|
||||||
|
GET_CONTAINER_INFO_LINE(julong, _memory->controller(), "/memory.stat", matchline,
|
||||||
|
"Hierarchical Memory Limit is: " JULONG_FORMAT, format, hier_memlimit)
|
||||||
|
if (hier_memlimit >= _unlimited_memory) {
|
||||||
|
log_trace(os, container)("Hierarchical Memory Limit is: Unlimited");
|
||||||
|
} else {
|
||||||
|
return (jlong)hier_memlimit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return (jlong)-1;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return (jlong)memlimit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
jlong CgroupV1Subsystem::memory_and_swap_limit_in_bytes() {
|
||||||
|
GET_CONTAINER_INFO(julong, _memory->controller(), "/memory.memsw.limit_in_bytes",
|
||||||
|
"Memory and Swap Limit is: " JULONG_FORMAT, JULONG_FORMAT, memswlimit);
|
||||||
|
if (memswlimit >= _unlimited_memory) {
|
||||||
|
log_trace(os, container)("Non-Hierarchical Memory and Swap Limit is: Unlimited");
|
||||||
|
CgroupV1MemoryController* mem_controller = reinterpret_cast<CgroupV1MemoryController*>(_memory->controller());
|
||||||
|
if (mem_controller->is_hierarchical()) {
|
||||||
|
const char* matchline = "hierarchical_memsw_limit";
|
||||||
|
const char* format = "%s " JULONG_FORMAT;
|
||||||
|
GET_CONTAINER_INFO_LINE(julong, _memory->controller(), "/memory.stat", matchline,
|
||||||
|
"Hierarchical Memory and Swap Limit is : " JULONG_FORMAT, format, hier_memlimit)
|
||||||
|
if (hier_memlimit >= _unlimited_memory) {
|
||||||
|
log_trace(os, container)("Hierarchical Memory and Swap Limit is: Unlimited");
|
||||||
|
} else {
|
||||||
|
return (jlong)hier_memlimit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return (jlong)-1;
|
||||||
|
} else {
|
||||||
|
return (jlong)memswlimit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
jlong CgroupV1Subsystem::memory_soft_limit_in_bytes() {
|
||||||
|
GET_CONTAINER_INFO(julong, _memory->controller(), "/memory.soft_limit_in_bytes",
|
||||||
|
"Memory Soft Limit is: " JULONG_FORMAT, JULONG_FORMAT, memsoftlimit);
|
||||||
|
if (memsoftlimit >= _unlimited_memory) {
|
||||||
|
log_trace(os, container)("Memory Soft Limit is: Unlimited");
|
||||||
|
return (jlong)-1;
|
||||||
|
} else {
|
||||||
|
return (jlong)memsoftlimit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* memory_usage_in_bytes
|
||||||
|
*
|
||||||
|
* Return the amount of used memory for this process.
|
||||||
|
*
|
||||||
|
* return:
|
||||||
|
* memory usage in bytes or
|
||||||
|
* -1 for unlimited
|
||||||
|
* OSCONTAINER_ERROR for not supported
|
||||||
|
*/
|
||||||
|
jlong CgroupV1Subsystem::memory_usage_in_bytes() {
|
||||||
|
GET_CONTAINER_INFO(jlong, _memory->controller(), "/memory.usage_in_bytes",
|
||||||
|
"Memory Usage is: " JLONG_FORMAT, JLONG_FORMAT, memusage);
|
||||||
|
return memusage;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* memory_max_usage_in_bytes
|
||||||
|
*
|
||||||
|
* Return the maximum amount of used memory for this process.
|
||||||
|
*
|
||||||
|
* return:
|
||||||
|
* max memory usage in bytes or
|
||||||
|
* OSCONTAINER_ERROR for not supported
|
||||||
|
*/
|
||||||
|
jlong CgroupV1Subsystem::memory_max_usage_in_bytes() {
|
||||||
|
GET_CONTAINER_INFO(jlong, _memory->controller(), "/memory.max_usage_in_bytes",
|
||||||
|
"Maximum Memory Usage is: " JLONG_FORMAT, JLONG_FORMAT, memmaxusage);
|
||||||
|
return memmaxusage;
|
||||||
|
}
|
||||||
|
|
||||||
|
char * CgroupV1Subsystem::cpu_cpuset_cpus() {
|
||||||
|
GET_CONTAINER_INFO_CPTR(cptr, _cpuset, "/cpuset.cpus",
|
||||||
|
"cpuset.cpus is: %s", "%1023s", cpus, 1024);
|
||||||
|
return os::strdup(cpus);
|
||||||
|
}
|
||||||
|
|
||||||
|
char * CgroupV1Subsystem::cpu_cpuset_memory_nodes() {
|
||||||
|
GET_CONTAINER_INFO_CPTR(cptr, _cpuset, "/cpuset.mems",
|
||||||
|
"cpuset.mems is: %s", "%1023s", mems, 1024);
|
||||||
|
return os::strdup(mems);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* cpu_quota
|
||||||
|
*
|
||||||
|
* Return the number of milliseconds per period
|
||||||
|
* process is guaranteed to run.
|
||||||
|
*
|
||||||
|
* return:
|
||||||
|
* quota time in milliseconds
|
||||||
|
* -1 for no quota
|
||||||
|
* OSCONTAINER_ERROR for not supported
|
||||||
|
*/
|
||||||
|
int CgroupV1Subsystem::cpu_quota() {
|
||||||
|
GET_CONTAINER_INFO(int, _cpu->controller(), "/cpu.cfs_quota_us",
|
||||||
|
"CPU Quota is: %d", "%d", quota);
|
||||||
|
return quota;
|
||||||
|
}
|
||||||
|
|
||||||
|
int CgroupV1Subsystem::cpu_period() {
|
||||||
|
GET_CONTAINER_INFO(int, _cpu->controller(), "/cpu.cfs_period_us",
|
||||||
|
"CPU Period is: %d", "%d", period);
|
||||||
|
return period;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* cpu_shares
|
||||||
|
*
|
||||||
|
* Return the amount of cpu shares available to the process
|
||||||
|
*
|
||||||
|
* return:
|
||||||
|
* Share number (typically a number relative to 1024)
|
||||||
|
* (2048 typically expresses 2 CPUs worth of processing)
|
||||||
|
* -1 for no share setup
|
||||||
|
* OSCONTAINER_ERROR for not supported
|
||||||
|
*/
|
||||||
|
int CgroupV1Subsystem::cpu_shares() {
|
||||||
|
GET_CONTAINER_INFO(int, _cpu->controller(), "/cpu.shares",
|
||||||
|
"CPU Shares is: %d", "%d", shares);
|
||||||
|
// Convert 1024 to no shares setup
|
||||||
|
if (shares == 1024) return -1;
|
||||||
|
|
||||||
|
return shares;
|
||||||
|
}
|
118
src/hotspot/os/linux/cgroupV1Subsystem_linux.hpp
Normal file
118
src/hotspot/os/linux/cgroupV1Subsystem_linux.hpp
Normal file
|
@ -0,0 +1,118 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef CGROUP_V1_SUBSYSTEM_LINUX_HPP
|
||||||
|
#define CGROUP_V1_SUBSYSTEM_LINUX_HPP
|
||||||
|
|
||||||
|
#include "runtime/os.hpp"
|
||||||
|
#include "memory/allocation.hpp"
|
||||||
|
#include "cgroupSubsystem_linux.hpp"
|
||||||
|
|
||||||
|
// Cgroups version 1 specific implementation
|
||||||
|
|
||||||
|
class CgroupV1Controller: public CgroupController {
|
||||||
|
private:
|
||||||
|
/* mountinfo contents */
|
||||||
|
char *_root;
|
||||||
|
char *_mount_point;
|
||||||
|
|
||||||
|
/* Constructed subsystem directory */
|
||||||
|
char *_path;
|
||||||
|
|
||||||
|
public:
|
||||||
|
CgroupV1Controller(char *root, char *mountpoint) {
|
||||||
|
_root = os::strdup(root);
|
||||||
|
_mount_point = os::strdup(mountpoint);
|
||||||
|
_path = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void set_subsystem_path(char *cgroup_path);
|
||||||
|
char *subsystem_path() { return _path; }
|
||||||
|
};
|
||||||
|
|
||||||
|
class CgroupV1MemoryController: public CgroupV1Controller {
|
||||||
|
|
||||||
|
public:
|
||||||
|
bool is_hierarchical() { return _uses_mem_hierarchy; }
|
||||||
|
void set_subsystem_path(char *cgroup_path);
|
||||||
|
private:
|
||||||
|
/* Some container runtimes set limits via cgroup
|
||||||
|
* hierarchy. If set to true consider also memory.stat
|
||||||
|
* file if everything else seems unlimited */
|
||||||
|
bool _uses_mem_hierarchy;
|
||||||
|
jlong uses_mem_hierarchy();
|
||||||
|
void set_hierarchical(bool value) { _uses_mem_hierarchy = value; }
|
||||||
|
|
||||||
|
public:
|
||||||
|
CgroupV1MemoryController(char *root, char *mountpoint) : CgroupV1Controller(root, mountpoint) {
|
||||||
|
_uses_mem_hierarchy = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
class CgroupV1Subsystem: public CgroupSubsystem {
|
||||||
|
|
||||||
|
public:
|
||||||
|
jlong read_memory_limit_in_bytes();
|
||||||
|
jlong memory_and_swap_limit_in_bytes();
|
||||||
|
jlong memory_soft_limit_in_bytes();
|
||||||
|
jlong memory_usage_in_bytes();
|
||||||
|
jlong memory_max_usage_in_bytes();
|
||||||
|
char * cpu_cpuset_cpus();
|
||||||
|
char * cpu_cpuset_memory_nodes();
|
||||||
|
|
||||||
|
int cpu_quota();
|
||||||
|
int cpu_period();
|
||||||
|
|
||||||
|
int cpu_shares();
|
||||||
|
|
||||||
|
const char * container_type() {
|
||||||
|
return "cgroupv1";
|
||||||
|
}
|
||||||
|
CachingCgroupController * memory_controller() { return _memory; }
|
||||||
|
CachingCgroupController * cpu_controller() { return _cpu; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
julong _unlimited_memory;
|
||||||
|
|
||||||
|
/* controllers */
|
||||||
|
CachingCgroupController* _memory = NULL;
|
||||||
|
CgroupV1Controller* _cpuset = NULL;
|
||||||
|
CachingCgroupController* _cpu = NULL;
|
||||||
|
CgroupV1Controller* _cpuacct = NULL;
|
||||||
|
|
||||||
|
public:
|
||||||
|
CgroupV1Subsystem(CgroupV1Controller* cpuset,
|
||||||
|
CgroupV1Controller* cpu,
|
||||||
|
CgroupV1Controller* cpuacct,
|
||||||
|
CgroupV1MemoryController* memory) {
|
||||||
|
_cpuset = cpuset;
|
||||||
|
_cpu = new CachingCgroupController(cpu);
|
||||||
|
_cpuacct = cpuacct;
|
||||||
|
_memory = new CachingCgroupController(memory);
|
||||||
|
_unlimited_memory = (LONG_MAX / os::vm_page_size()) * os::vm_page_size();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // CGROUP_V1_SUBSYSTEM_LINUX_HPP
|
235
src/hotspot/os/linux/cgroupV2Subsystem_linux.cpp
Normal file
235
src/hotspot/os/linux/cgroupV2Subsystem_linux.cpp
Normal file
|
@ -0,0 +1,235 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2020, Red Hat Inc.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "cgroupV2Subsystem_linux.hpp"
|
||||||
|
|
||||||
|
/* cpu_shares
|
||||||
|
*
|
||||||
|
* Return the amount of cpu shares available to the process
|
||||||
|
*
|
||||||
|
* return:
|
||||||
|
* Share number (typically a number relative to 1024)
|
||||||
|
* (2048 typically expresses 2 CPUs worth of processing)
|
||||||
|
* -1 for no share setup
|
||||||
|
* OSCONTAINER_ERROR for not supported
|
||||||
|
*/
|
||||||
|
int CgroupV2Subsystem::cpu_shares() {
|
||||||
|
GET_CONTAINER_INFO(int, _unified, "/cpu.weight",
|
||||||
|
"Raw value for CPU shares is: %d", "%d", shares);
|
||||||
|
// Convert default value of 100 to no shares setup
|
||||||
|
if (shares == 100) {
|
||||||
|
log_debug(os, container)("CPU Shares is: %d", -1);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// CPU shares (OCI) value needs to get translated into
|
||||||
|
// a proper Cgroups v2 value. See:
|
||||||
|
// https://github.com/containers/crun/blob/master/crun.1.md#cpu-controller
|
||||||
|
//
|
||||||
|
// Use the inverse of (x == OCI value, y == cgroupsv2 value):
|
||||||
|
// ((262142 * y - 1)/9999) + 2 = x
|
||||||
|
//
|
||||||
|
int x = 262142 * shares - 1;
|
||||||
|
double frac = x/9999.0;
|
||||||
|
x = ((int)frac) + 2;
|
||||||
|
log_trace(os, container)("Scaled CPU shares value is: %d", x);
|
||||||
|
// Since the scaled value is not precise, return the closest
|
||||||
|
// multiple of PER_CPU_SHARES for a more conservative mapping
|
||||||
|
if ( x <= PER_CPU_SHARES ) {
|
||||||
|
// will always map to 1 CPU
|
||||||
|
log_debug(os, container)("CPU Shares is: %d", x);
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
int f = x/PER_CPU_SHARES;
|
||||||
|
int lower_multiple = f * PER_CPU_SHARES;
|
||||||
|
int upper_multiple = (f + 1) * PER_CPU_SHARES;
|
||||||
|
int distance_lower = MAX2(lower_multiple, x) - MIN2(lower_multiple, x);
|
||||||
|
int distance_upper = MAX2(upper_multiple, x) - MIN2(upper_multiple, x);
|
||||||
|
x = distance_lower <= distance_upper ? lower_multiple : upper_multiple;
|
||||||
|
log_trace(os, container)("Closest multiple of %d of the CPU Shares value is: %d", PER_CPU_SHARES, x);
|
||||||
|
log_debug(os, container)("CPU Shares is: %d", x);
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* cpu_quota
|
||||||
|
*
|
||||||
|
* Return the number of milliseconds per period
|
||||||
|
* process is guaranteed to run.
|
||||||
|
*
|
||||||
|
* return:
|
||||||
|
* quota time in milliseconds
|
||||||
|
* -1 for no quota
|
||||||
|
* OSCONTAINER_ERROR for not supported
|
||||||
|
*/
|
||||||
|
int CgroupV2Subsystem::cpu_quota() {
|
||||||
|
char * cpu_quota_str = cpu_quota_val();
|
||||||
|
int limit = (int)limit_from_str(cpu_quota_str);
|
||||||
|
log_trace(os, container)("CPU Quota is: %d", limit);
|
||||||
|
return limit;
|
||||||
|
}
|
||||||
|
|
||||||
|
char * CgroupV2Subsystem::cpu_cpuset_cpus() {
|
||||||
|
GET_CONTAINER_INFO_CPTR(cptr, _unified, "/cpuset.cpus",
|
||||||
|
"cpuset.cpus is: %s", "%1023s", cpus, 1024);
|
||||||
|
if (cpus == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
return os::strdup(cpus);
|
||||||
|
}
|
||||||
|
|
||||||
|
char* CgroupV2Subsystem::cpu_quota_val() {
|
||||||
|
GET_CONTAINER_INFO_CPTR(cptr, _unified, "/cpu.max",
|
||||||
|
"Raw value for CPU quota is: %s", "%s %*d", quota, 1024);
|
||||||
|
if (quota == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
return os::strdup(quota);
|
||||||
|
}
|
||||||
|
|
||||||
|
char * CgroupV2Subsystem::cpu_cpuset_memory_nodes() {
|
||||||
|
GET_CONTAINER_INFO_CPTR(cptr, _unified, "/cpuset.mems",
|
||||||
|
"cpuset.mems is: %s", "%1023s", mems, 1024);
|
||||||
|
if (mems == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
return os::strdup(mems);
|
||||||
|
}
|
||||||
|
|
||||||
|
int CgroupV2Subsystem::cpu_period() {
|
||||||
|
GET_CONTAINER_INFO(int, _unified, "/cpu.max",
|
||||||
|
"CPU Period is: %d", "%*s %d", period);
|
||||||
|
return period;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* memory_usage_in_bytes
|
||||||
|
*
|
||||||
|
* Return the amount of used memory used by this cgroup and decendents
|
||||||
|
*
|
||||||
|
* return:
|
||||||
|
* memory usage in bytes or
|
||||||
|
* -1 for unlimited
|
||||||
|
* OSCONTAINER_ERROR for not supported
|
||||||
|
*/
|
||||||
|
jlong CgroupV2Subsystem::memory_usage_in_bytes() {
|
||||||
|
GET_CONTAINER_INFO(jlong, _unified, "/memory.current",
|
||||||
|
"Memory Usage is: " JLONG_FORMAT, JLONG_FORMAT, memusage);
|
||||||
|
return memusage;
|
||||||
|
}
|
||||||
|
|
||||||
|
jlong CgroupV2Subsystem::memory_soft_limit_in_bytes() {
|
||||||
|
char* mem_soft_limit_str = mem_soft_limit_val();
|
||||||
|
return limit_from_str(mem_soft_limit_str);
|
||||||
|
}
|
||||||
|
|
||||||
|
jlong CgroupV2Subsystem::memory_max_usage_in_bytes() {
|
||||||
|
// Log this string at trace level so as to make tests happy.
|
||||||
|
log_trace(os, container)("Maximum Memory Usage is not supported.");
|
||||||
|
return OSCONTAINER_ERROR; // not supported
|
||||||
|
}
|
||||||
|
|
||||||
|
char* CgroupV2Subsystem::mem_soft_limit_val() {
|
||||||
|
GET_CONTAINER_INFO_CPTR(cptr, _unified, "/memory.high",
|
||||||
|
"Memory Soft Limit is: %s", "%s", mem_soft_limit_str, 1024);
|
||||||
|
if (mem_soft_limit_str == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
return os::strdup(mem_soft_limit_str);
|
||||||
|
}
|
||||||
|
|
||||||
|
jlong CgroupV2Subsystem::memory_and_swap_limit_in_bytes() {
|
||||||
|
char* mem_swp_limit_str = mem_swp_limit_val();
|
||||||
|
return limit_from_str(mem_swp_limit_str);
|
||||||
|
}
|
||||||
|
|
||||||
|
char* CgroupV2Subsystem::mem_swp_limit_val() {
|
||||||
|
GET_CONTAINER_INFO_CPTR(cptr, _unified, "/memory.swap.max",
|
||||||
|
"Memory and Swap Limit is: %s", "%s", mem_swp_limit_str, 1024);
|
||||||
|
if (mem_swp_limit_str == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
return os::strdup(mem_swp_limit_str);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* memory_limit_in_bytes
|
||||||
|
*
|
||||||
|
* Return the limit of available memory for this process.
|
||||||
|
*
|
||||||
|
* return:
|
||||||
|
* memory limit in bytes or
|
||||||
|
* -1 for unlimited, OSCONTAINER_ERROR for an error
|
||||||
|
*/
|
||||||
|
jlong CgroupV2Subsystem::read_memory_limit_in_bytes() {
|
||||||
|
char * mem_limit_str = mem_limit_val();
|
||||||
|
jlong limit = limit_from_str(mem_limit_str);
|
||||||
|
if (log_is_enabled(Trace, os, container)) {
|
||||||
|
if (limit == -1) {
|
||||||
|
log_trace(os, container)("Memory Limit is: Unlimited");
|
||||||
|
} else {
|
||||||
|
log_trace(os, container)("Memory Limit is: " JLONG_FORMAT, limit);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return limit;
|
||||||
|
}
|
||||||
|
|
||||||
|
jlong CgroupV2Subsystem::limit_from_str(char* limit_str) {
|
||||||
|
if (limit_str == NULL) {
|
||||||
|
return OSCONTAINER_ERROR;
|
||||||
|
}
|
||||||
|
// Unlimited memory in Cgroups V2 is the literal string 'max'
|
||||||
|
if (strcmp("max", limit_str) == 0) {
|
||||||
|
os::free(limit_str);
|
||||||
|
return (jlong)-1;
|
||||||
|
}
|
||||||
|
julong limit;
|
||||||
|
if (sscanf(limit_str, JULONG_FORMAT, &limit) != 1) {
|
||||||
|
os::free(limit_str);
|
||||||
|
return OSCONTAINER_ERROR;
|
||||||
|
}
|
||||||
|
os::free(limit_str);
|
||||||
|
return (jlong)limit;
|
||||||
|
}
|
||||||
|
|
||||||
|
char* CgroupV2Subsystem::mem_limit_val() {
|
||||||
|
GET_CONTAINER_INFO_CPTR(cptr, _unified, "/memory.max",
|
||||||
|
"Raw value for memory limit is: %s", "%s", mem_limit_str, 1024);
|
||||||
|
if (mem_limit_str == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
return os::strdup(mem_limit_str);
|
||||||
|
}
|
||||||
|
|
||||||
|
char* CgroupV2Controller::construct_path(char* mount_path, char *cgroup_path) {
|
||||||
|
char buf[MAXPATHLEN+1];
|
||||||
|
int buflen;
|
||||||
|
strncpy(buf, mount_path, MAXPATHLEN);
|
||||||
|
buf[MAXPATHLEN] = '\0';
|
||||||
|
buflen = strlen(buf);
|
||||||
|
if ((buflen + strlen(cgroup_path)) > MAXPATHLEN) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
strncat(buf, cgroup_path, MAXPATHLEN-buflen);
|
||||||
|
buf[MAXPATHLEN] = '\0';
|
||||||
|
return os::strdup(buf);
|
||||||
|
}
|
||||||
|
|
89
src/hotspot/os/linux/cgroupV2Subsystem_linux.hpp
Normal file
89
src/hotspot/os/linux/cgroupV2Subsystem_linux.hpp
Normal file
|
@ -0,0 +1,89 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2020, Red Hat Inc.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef CGROUP_V2_SUBSYSTEM_LINUX_HPP
|
||||||
|
#define CGROUP_V2_SUBSYSTEM_LINUX_HPP
|
||||||
|
|
||||||
|
#include "cgroupSubsystem_linux.hpp"
|
||||||
|
|
||||||
|
class CgroupV2Controller: public CgroupController {
|
||||||
|
private:
|
||||||
|
/* the mount path of the cgroup v2 hierarchy */
|
||||||
|
char *_mount_path;
|
||||||
|
/* The cgroup path for the controller */
|
||||||
|
char *_cgroup_path;
|
||||||
|
|
||||||
|
/* Constructed full path to the subsystem directory */
|
||||||
|
char *_path;
|
||||||
|
static char* construct_path(char* mount_path, char *cgroup_path);
|
||||||
|
|
||||||
|
public:
|
||||||
|
CgroupV2Controller(char * mount_path, char *cgroup_path) {
|
||||||
|
_mount_path = mount_path;
|
||||||
|
_cgroup_path = os::strdup(cgroup_path);
|
||||||
|
_path = construct_path(mount_path, cgroup_path);
|
||||||
|
}
|
||||||
|
|
||||||
|
char *subsystem_path() { return _path; }
|
||||||
|
};
|
||||||
|
|
||||||
|
class CgroupV2Subsystem: public CgroupSubsystem {
|
||||||
|
private:
|
||||||
|
/* One unified controller */
|
||||||
|
CgroupController* _unified = NULL;
|
||||||
|
/* Caching wrappers for cpu/memory metrics */
|
||||||
|
CachingCgroupController* _memory = NULL;
|
||||||
|
CachingCgroupController* _cpu = NULL;
|
||||||
|
|
||||||
|
char *mem_limit_val();
|
||||||
|
char *mem_swp_limit_val();
|
||||||
|
char *mem_soft_limit_val();
|
||||||
|
char *cpu_quota_val();
|
||||||
|
jlong limit_from_str(char* limit_str);
|
||||||
|
|
||||||
|
public:
|
||||||
|
CgroupV2Subsystem(CgroupController * unified) {
|
||||||
|
_unified = unified;
|
||||||
|
_memory = new CachingCgroupController(unified);
|
||||||
|
_cpu = new CachingCgroupController(unified);
|
||||||
|
}
|
||||||
|
|
||||||
|
jlong read_memory_limit_in_bytes();
|
||||||
|
int cpu_quota();
|
||||||
|
int cpu_period();
|
||||||
|
int cpu_shares();
|
||||||
|
jlong memory_and_swap_limit_in_bytes();
|
||||||
|
jlong memory_soft_limit_in_bytes();
|
||||||
|
jlong memory_usage_in_bytes();
|
||||||
|
jlong memory_max_usage_in_bytes();
|
||||||
|
char * cpu_cpuset_cpus();
|
||||||
|
char * cpu_cpuset_memory_nodes();
|
||||||
|
const char * container_type() {
|
||||||
|
return "cgroupv2";
|
||||||
|
}
|
||||||
|
CachingCgroupController * memory_controller() { return _memory; }
|
||||||
|
CachingCgroupController * cpu_controller() { return _cpu; }
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // CGROUP_V2_SUBSYSTEM_LINUX_HPP
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2017, 2019, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
@ -25,275 +25,16 @@
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include "utilities/globalDefinitions.hpp"
|
|
||||||
#include "memory/allocation.hpp"
|
|
||||||
#include "runtime/globals.hpp"
|
#include "runtime/globals.hpp"
|
||||||
#include "runtime/os.hpp"
|
#include "runtime/os.hpp"
|
||||||
#include "logging/log.hpp"
|
#include "logging/log.hpp"
|
||||||
#include "osContainer_linux.hpp"
|
#include "osContainer_linux.hpp"
|
||||||
|
#include "cgroupSubsystem_linux.hpp"
|
||||||
|
|
||||||
/*
|
|
||||||
* PER_CPU_SHARES has been set to 1024 because CPU shares' quota
|
|
||||||
* is commonly used in cloud frameworks like Kubernetes[1],
|
|
||||||
* AWS[2] and Mesos[3] in a similar way. They spawn containers with
|
|
||||||
* --cpu-shares option values scaled by PER_CPU_SHARES. Thus, we do
|
|
||||||
* the inverse for determining the number of possible available
|
|
||||||
* CPUs to the JVM inside a container. See JDK-8216366.
|
|
||||||
*
|
|
||||||
* [1] https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#meaning-of-cpu
|
|
||||||
* In particular:
|
|
||||||
* When using Docker:
|
|
||||||
* The spec.containers[].resources.requests.cpu is converted to its core value, which is potentially
|
|
||||||
* fractional, and multiplied by 1024. The greater of this number or 2 is used as the value of the
|
|
||||||
* --cpu-shares flag in the docker run command.
|
|
||||||
* [2] https://docs.aws.amazon.com/AmazonECS/latest/APIReference/API_ContainerDefinition.html
|
|
||||||
* [3] https://github.com/apache/mesos/blob/3478e344fb77d931f6122980c6e94cd3913c441d/src/docker/docker.cpp#L648
|
|
||||||
* https://github.com/apache/mesos/blob/3478e344fb77d931f6122980c6e94cd3913c441d/src/slave/containerizer/mesos/isolators/cgroups/constants.hpp#L30
|
|
||||||
*/
|
|
||||||
#define PER_CPU_SHARES 1024
|
|
||||||
|
|
||||||
bool OSContainer::_is_initialized = false;
|
bool OSContainer::_is_initialized = false;
|
||||||
bool OSContainer::_is_containerized = false;
|
bool OSContainer::_is_containerized = false;
|
||||||
int OSContainer::_active_processor_count = 1;
|
CgroupSubsystem* cgroup_subsystem;
|
||||||
julong _unlimited_memory;
|
|
||||||
|
|
||||||
class CgroupSubsystem: CHeapObj<mtInternal> {
|
|
||||||
friend class OSContainer;
|
|
||||||
|
|
||||||
|
|
||||||
private:
|
|
||||||
volatile jlong _next_check_counter;
|
|
||||||
|
|
||||||
/* mountinfo contents */
|
|
||||||
char *_root;
|
|
||||||
char *_mount_point;
|
|
||||||
|
|
||||||
/* Constructed subsystem directory */
|
|
||||||
char *_path;
|
|
||||||
|
|
||||||
public:
|
|
||||||
CgroupSubsystem(char *root, char *mountpoint) {
|
|
||||||
_root = os::strdup(root);
|
|
||||||
_mount_point = os::strdup(mountpoint);
|
|
||||||
_path = NULL;
|
|
||||||
_next_check_counter = min_jlong;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Set directory to subsystem specific files based
|
|
||||||
* on the contents of the mountinfo and cgroup files.
|
|
||||||
*/
|
|
||||||
void set_subsystem_path(char *cgroup_path) {
|
|
||||||
char buf[MAXPATHLEN+1];
|
|
||||||
if (_root != NULL && cgroup_path != NULL) {
|
|
||||||
if (strcmp(_root, "/") == 0) {
|
|
||||||
int buflen;
|
|
||||||
strncpy(buf, _mount_point, MAXPATHLEN);
|
|
||||||
buf[MAXPATHLEN-1] = '\0';
|
|
||||||
if (strcmp(cgroup_path,"/") != 0) {
|
|
||||||
buflen = strlen(buf);
|
|
||||||
if ((buflen + strlen(cgroup_path)) > (MAXPATHLEN-1)) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
strncat(buf, cgroup_path, MAXPATHLEN-buflen);
|
|
||||||
buf[MAXPATHLEN-1] = '\0';
|
|
||||||
}
|
|
||||||
_path = os::strdup(buf);
|
|
||||||
} else {
|
|
||||||
if (strcmp(_root, cgroup_path) == 0) {
|
|
||||||
strncpy(buf, _mount_point, MAXPATHLEN);
|
|
||||||
buf[MAXPATHLEN-1] = '\0';
|
|
||||||
_path = os::strdup(buf);
|
|
||||||
} else {
|
|
||||||
char *p = strstr(cgroup_path, _root);
|
|
||||||
if (p != NULL && p == _root) {
|
|
||||||
if (strlen(cgroup_path) > strlen(_root)) {
|
|
||||||
int buflen;
|
|
||||||
strncpy(buf, _mount_point, MAXPATHLEN);
|
|
||||||
buf[MAXPATHLEN-1] = '\0';
|
|
||||||
buflen = strlen(buf);
|
|
||||||
if ((buflen + strlen(cgroup_path) - strlen(_root)) > (MAXPATHLEN-1)) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
strncat(buf, cgroup_path + strlen(_root), MAXPATHLEN-buflen);
|
|
||||||
buf[MAXPATHLEN-1] = '\0';
|
|
||||||
_path = os::strdup(buf);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
char *subsystem_path() { return _path; }
|
|
||||||
|
|
||||||
bool cache_has_expired() {
|
|
||||||
return os::elapsed_counter() > _next_check_counter;
|
|
||||||
}
|
|
||||||
|
|
||||||
void set_cache_expiry_time(jlong timeout) {
|
|
||||||
_next_check_counter = os::elapsed_counter() + timeout;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
class CgroupMemorySubsystem: CgroupSubsystem {
|
|
||||||
friend class OSContainer;
|
|
||||||
|
|
||||||
private:
|
|
||||||
/* Some container runtimes set limits via cgroup
|
|
||||||
* hierarchy. If set to true consider also memory.stat
|
|
||||||
* file if everything else seems unlimited */
|
|
||||||
bool _uses_mem_hierarchy;
|
|
||||||
volatile jlong _memory_limit_in_bytes;
|
|
||||||
|
|
||||||
public:
|
|
||||||
CgroupMemorySubsystem(char *root, char *mountpoint) : CgroupSubsystem::CgroupSubsystem(root, mountpoint) {
|
|
||||||
_uses_mem_hierarchy = false;
|
|
||||||
_memory_limit_in_bytes = -1;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_hierarchical() { return _uses_mem_hierarchy; }
|
|
||||||
void set_hierarchical(bool value) { _uses_mem_hierarchy = value; }
|
|
||||||
|
|
||||||
jlong memory_limit_in_bytes() { return _memory_limit_in_bytes; }
|
|
||||||
void set_memory_limit_in_bytes(jlong value) {
|
|
||||||
_memory_limit_in_bytes = value;
|
|
||||||
// max memory limit is unlikely to change, but we want to remain
|
|
||||||
// responsive to configuration changes. A very short grace time
|
|
||||||
// between re-read avoids excessive overhead during startup without
|
|
||||||
// significantly reducing the VMs ability to promptly react to reduced
|
|
||||||
// memory availability
|
|
||||||
set_cache_expiry_time(OSCONTAINER_CACHE_TIMEOUT);
|
|
||||||
}
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
CgroupMemorySubsystem* memory = NULL;
|
|
||||||
CgroupSubsystem* cpuset = NULL;
|
|
||||||
CgroupSubsystem* cpu = NULL;
|
|
||||||
CgroupSubsystem* cpuacct = NULL;
|
|
||||||
|
|
||||||
typedef char * cptr;
|
|
||||||
|
|
||||||
PRAGMA_DIAG_PUSH
|
|
||||||
PRAGMA_FORMAT_NONLITERAL_IGNORED
|
|
||||||
template <typename T> int subsystem_file_line_contents(CgroupSubsystem* c,
|
|
||||||
const char *filename,
|
|
||||||
const char *matchline,
|
|
||||||
const char *scan_fmt,
|
|
||||||
T returnval) {
|
|
||||||
FILE *fp = NULL;
|
|
||||||
char *p;
|
|
||||||
char file[MAXPATHLEN+1];
|
|
||||||
char buf[MAXPATHLEN+1];
|
|
||||||
char discard[MAXPATHLEN+1];
|
|
||||||
bool found_match = false;
|
|
||||||
|
|
||||||
if (c == NULL) {
|
|
||||||
log_debug(os, container)("subsystem_file_line_contents: CgroupSubsytem* is NULL");
|
|
||||||
return OSCONTAINER_ERROR;
|
|
||||||
}
|
|
||||||
if (c->subsystem_path() == NULL) {
|
|
||||||
log_debug(os, container)("subsystem_file_line_contents: subsystem path is NULL");
|
|
||||||
return OSCONTAINER_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
strncpy(file, c->subsystem_path(), MAXPATHLEN);
|
|
||||||
file[MAXPATHLEN-1] = '\0';
|
|
||||||
int filelen = strlen(file);
|
|
||||||
if ((filelen + strlen(filename)) > (MAXPATHLEN-1)) {
|
|
||||||
log_debug(os, container)("File path too long %s, %s", file, filename);
|
|
||||||
return OSCONTAINER_ERROR;
|
|
||||||
}
|
|
||||||
strncat(file, filename, MAXPATHLEN-filelen);
|
|
||||||
log_trace(os, container)("Path to %s is %s", filename, file);
|
|
||||||
fp = fopen(file, "r");
|
|
||||||
if (fp != NULL) {
|
|
||||||
int err = 0;
|
|
||||||
while ((p = fgets(buf, MAXPATHLEN, fp)) != NULL) {
|
|
||||||
found_match = false;
|
|
||||||
if (matchline == NULL) {
|
|
||||||
// single-line file case
|
|
||||||
int matched = sscanf(p, scan_fmt, returnval);
|
|
||||||
found_match = (matched == 1);
|
|
||||||
} else {
|
|
||||||
// multi-line file case
|
|
||||||
if (strstr(p, matchline) != NULL) {
|
|
||||||
// discard matchline string prefix
|
|
||||||
int matched = sscanf(p, scan_fmt, discard, returnval);
|
|
||||||
found_match = (matched == 2);
|
|
||||||
} else {
|
|
||||||
continue; // substring not found
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (found_match) {
|
|
||||||
fclose(fp);
|
|
||||||
return 0;
|
|
||||||
} else {
|
|
||||||
err = 1;
|
|
||||||
log_debug(os, container)("Type %s not found in file %s", scan_fmt, file);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (err == 0) {
|
|
||||||
log_debug(os, container)("Empty file %s", file);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
log_debug(os, container)("Open of file %s failed, %s", file, os::strerror(errno));
|
|
||||||
}
|
|
||||||
if (fp != NULL)
|
|
||||||
fclose(fp);
|
|
||||||
return OSCONTAINER_ERROR;
|
|
||||||
}
|
|
||||||
PRAGMA_DIAG_POP
|
|
||||||
|
|
||||||
#define GET_CONTAINER_INFO(return_type, subsystem, filename, \
|
|
||||||
logstring, scan_fmt, variable) \
|
|
||||||
return_type variable; \
|
|
||||||
{ \
|
|
||||||
int err; \
|
|
||||||
err = subsystem_file_line_contents(subsystem, \
|
|
||||||
filename, \
|
|
||||||
NULL, \
|
|
||||||
scan_fmt, \
|
|
||||||
&variable); \
|
|
||||||
if (err != 0) \
|
|
||||||
return (return_type) OSCONTAINER_ERROR; \
|
|
||||||
\
|
|
||||||
log_trace(os, container)(logstring, variable); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define GET_CONTAINER_INFO_CPTR(return_type, subsystem, filename, \
|
|
||||||
logstring, scan_fmt, variable, bufsize) \
|
|
||||||
char variable[bufsize]; \
|
|
||||||
{ \
|
|
||||||
int err; \
|
|
||||||
err = subsystem_file_line_contents(subsystem, \
|
|
||||||
filename, \
|
|
||||||
NULL, \
|
|
||||||
scan_fmt, \
|
|
||||||
variable); \
|
|
||||||
if (err != 0) \
|
|
||||||
return (return_type) NULL; \
|
|
||||||
\
|
|
||||||
log_trace(os, container)(logstring, variable); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define GET_CONTAINER_INFO_LINE(return_type, subsystem, filename, \
|
|
||||||
matchline, logstring, scan_fmt, variable) \
|
|
||||||
return_type variable; \
|
|
||||||
{ \
|
|
||||||
int err; \
|
|
||||||
err = subsystem_file_line_contents(subsystem, \
|
|
||||||
filename, \
|
|
||||||
matchline, \
|
|
||||||
scan_fmt, \
|
|
||||||
&variable); \
|
|
||||||
if (err != 0) \
|
|
||||||
return (return_type) OSCONTAINER_ERROR; \
|
|
||||||
\
|
|
||||||
log_trace(os, container)(logstring, variable); \
|
|
||||||
}
|
|
||||||
|
|
||||||
/* init
|
/* init
|
||||||
*
|
*
|
||||||
|
@ -301,12 +42,6 @@ PRAGMA_DIAG_POP
|
||||||
* we are running under cgroup control.
|
* we are running under cgroup control.
|
||||||
*/
|
*/
|
||||||
void OSContainer::init() {
|
void OSContainer::init() {
|
||||||
FILE *mntinfo = NULL;
|
|
||||||
FILE *cgroup = NULL;
|
|
||||||
char buf[MAXPATHLEN+1];
|
|
||||||
char tmproot[MAXPATHLEN+1];
|
|
||||||
char tmpmount[MAXPATHLEN+1];
|
|
||||||
char *p;
|
|
||||||
jlong mem_limit;
|
jlong mem_limit;
|
||||||
|
|
||||||
assert(!_is_initialized, "Initializing OSContainer more than once");
|
assert(!_is_initialized, "Initializing OSContainer more than once");
|
||||||
|
@ -314,139 +49,19 @@ void OSContainer::init() {
|
||||||
_is_initialized = true;
|
_is_initialized = true;
|
||||||
_is_containerized = false;
|
_is_containerized = false;
|
||||||
|
|
||||||
_unlimited_memory = (LONG_MAX / os::vm_page_size()) * os::vm_page_size();
|
|
||||||
|
|
||||||
log_trace(os, container)("OSContainer::init: Initializing Container Support");
|
log_trace(os, container)("OSContainer::init: Initializing Container Support");
|
||||||
if (!UseContainerSupport) {
|
if (!UseContainerSupport) {
|
||||||
log_trace(os, container)("Container Support not enabled");
|
log_trace(os, container)("Container Support not enabled");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
cgroup_subsystem = CgroupSubsystemFactory::create();
|
||||||
* Find the cgroup mount point for memory and cpuset
|
if (cgroup_subsystem == NULL) {
|
||||||
* by reading /proc/self/mountinfo
|
return; // Required subsystem files not found or other error
|
||||||
*
|
|
||||||
* Example for docker:
|
|
||||||
* 219 214 0:29 /docker/7208cebd00fa5f2e342b1094f7bed87fa25661471a4637118e65f1c995be8a34 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory
|
|
||||||
*
|
|
||||||
* Example for host:
|
|
||||||
* 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory
|
|
||||||
*/
|
|
||||||
mntinfo = fopen("/proc/self/mountinfo", "r");
|
|
||||||
if (mntinfo == NULL) {
|
|
||||||
log_debug(os, container)("Can't open /proc/self/mountinfo, %s",
|
|
||||||
os::strerror(errno));
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
while ((p = fgets(buf, MAXPATHLEN, mntinfo)) != NULL) {
|
|
||||||
char tmpcgroups[MAXPATHLEN+1];
|
|
||||||
char *cptr = tmpcgroups;
|
|
||||||
char *token;
|
|
||||||
|
|
||||||
// mountinfo format is documented at https://www.kernel.org/doc/Documentation/filesystems/proc.txt
|
|
||||||
if (sscanf(p, "%*d %*d %*d:%*d %s %s %*[^-]- cgroup %*s %s", tmproot, tmpmount, tmpcgroups) != 3) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
while ((token = strsep(&cptr, ",")) != NULL) {
|
|
||||||
if (strcmp(token, "memory") == 0) {
|
|
||||||
memory = new CgroupMemorySubsystem(tmproot, tmpmount);
|
|
||||||
} else if (strcmp(token, "cpuset") == 0) {
|
|
||||||
cpuset = new CgroupSubsystem(tmproot, tmpmount);
|
|
||||||
} else if (strcmp(token, "cpu") == 0) {
|
|
||||||
cpu = new CgroupSubsystem(tmproot, tmpmount);
|
|
||||||
} else if (strcmp(token, "cpuacct") == 0) {
|
|
||||||
cpuacct= new CgroupSubsystem(tmproot, tmpmount);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fclose(mntinfo);
|
|
||||||
|
|
||||||
if (memory == NULL) {
|
|
||||||
log_debug(os, container)("Required cgroup memory subsystem not found");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (cpuset == NULL) {
|
|
||||||
log_debug(os, container)("Required cgroup cpuset subsystem not found");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (cpu == NULL) {
|
|
||||||
log_debug(os, container)("Required cgroup cpu subsystem not found");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (cpuacct == NULL) {
|
|
||||||
log_debug(os, container)("Required cgroup cpuacct subsystem not found");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Read /proc/self/cgroup and map host mount point to
|
|
||||||
* local one via /proc/self/mountinfo content above
|
|
||||||
*
|
|
||||||
* Docker example:
|
|
||||||
* 5:memory:/docker/6558aed8fc662b194323ceab5b964f69cf36b3e8af877a14b80256e93aecb044
|
|
||||||
*
|
|
||||||
* Host example:
|
|
||||||
* 5:memory:/user.slice
|
|
||||||
*
|
|
||||||
* Construct a path to the process specific memory and cpuset
|
|
||||||
* cgroup directory.
|
|
||||||
*
|
|
||||||
* For a container running under Docker from memory example above
|
|
||||||
* the paths would be:
|
|
||||||
*
|
|
||||||
* /sys/fs/cgroup/memory
|
|
||||||
*
|
|
||||||
* For a Host from memory example above the path would be:
|
|
||||||
*
|
|
||||||
* /sys/fs/cgroup/memory/user.slice
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
cgroup = fopen("/proc/self/cgroup", "r");
|
|
||||||
if (cgroup == NULL) {
|
|
||||||
log_debug(os, container)("Can't open /proc/self/cgroup, %s",
|
|
||||||
os::strerror(errno));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
while ((p = fgets(buf, MAXPATHLEN, cgroup)) != NULL) {
|
|
||||||
char *controllers;
|
|
||||||
char *token;
|
|
||||||
char *base;
|
|
||||||
|
|
||||||
/* Skip cgroup number */
|
|
||||||
strsep(&p, ":");
|
|
||||||
/* Get controllers and base */
|
|
||||||
controllers = strsep(&p, ":");
|
|
||||||
base = strsep(&p, "\n");
|
|
||||||
|
|
||||||
if (controllers == NULL) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
while ((token = strsep(&controllers, ",")) != NULL) {
|
|
||||||
if (strcmp(token, "memory") == 0) {
|
|
||||||
memory->set_subsystem_path(base);
|
|
||||||
jlong hierarchy = uses_mem_hierarchy();
|
|
||||||
if (hierarchy > 0) {
|
|
||||||
memory->set_hierarchical(true);
|
|
||||||
}
|
|
||||||
} else if (strcmp(token, "cpuset") == 0) {
|
|
||||||
cpuset->set_subsystem_path(base);
|
|
||||||
} else if (strcmp(token, "cpu") == 0) {
|
|
||||||
cpu->set_subsystem_path(base);
|
|
||||||
} else if (strcmp(token, "cpuacct") == 0) {
|
|
||||||
cpuacct->set_subsystem_path(base);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fclose(cgroup);
|
|
||||||
|
|
||||||
// We need to update the amount of physical memory now that
|
// We need to update the amount of physical memory now that
|
||||||
// command line arguments have been processed.
|
// cgroup subsystem files have been processed.
|
||||||
if ((mem_limit = memory_limit_in_bytes()) > 0) {
|
if ((mem_limit = cgroup_subsystem->memory_limit_in_bytes()) > 0) {
|
||||||
os::Linux::set_physical_memory(mem_limit);
|
os::Linux::set_physical_memory(mem_limit);
|
||||||
log_info(os, container)("Memory Limit is: " JLONG_FORMAT, mem_limit);
|
log_info(os, container)("Memory Limit is: " JLONG_FORMAT, mem_limit);
|
||||||
}
|
}
|
||||||
|
@ -456,272 +71,61 @@ void OSContainer::init() {
|
||||||
}
|
}
|
||||||
|
|
||||||
const char * OSContainer::container_type() {
|
const char * OSContainer::container_type() {
|
||||||
if (is_containerized()) {
|
assert(cgroup_subsystem != NULL, "cgroup subsystem not available");
|
||||||
return "cgroupv1";
|
return cgroup_subsystem->container_type();
|
||||||
} else {
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* uses_mem_hierarchy
|
|
||||||
*
|
|
||||||
* Return whether or not hierarchical cgroup accounting is being
|
|
||||||
* done.
|
|
||||||
*
|
|
||||||
* return:
|
|
||||||
* A number > 0 if true, or
|
|
||||||
* OSCONTAINER_ERROR for not supported
|
|
||||||
*/
|
|
||||||
jlong OSContainer::uses_mem_hierarchy() {
|
|
||||||
GET_CONTAINER_INFO(jlong, memory, "/memory.use_hierarchy",
|
|
||||||
"Use Hierarchy is: " JLONG_FORMAT, JLONG_FORMAT, use_hierarchy);
|
|
||||||
return use_hierarchy;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* memory_limit_in_bytes
|
|
||||||
*
|
|
||||||
* Return the limit of available memory for this process.
|
|
||||||
*
|
|
||||||
* return:
|
|
||||||
* memory limit in bytes or
|
|
||||||
* -1 for unlimited
|
|
||||||
* OSCONTAINER_ERROR for not supported
|
|
||||||
*/
|
|
||||||
jlong OSContainer::memory_limit_in_bytes() {
|
jlong OSContainer::memory_limit_in_bytes() {
|
||||||
if (!memory->cache_has_expired()) {
|
assert(cgroup_subsystem != NULL, "cgroup subsystem not available");
|
||||||
return memory->memory_limit_in_bytes();
|
return cgroup_subsystem->memory_limit_in_bytes();
|
||||||
}
|
|
||||||
jlong memory_limit = read_memory_limit_in_bytes();
|
|
||||||
// Update CgroupMemorySubsystem to avoid re-reading container settings too often
|
|
||||||
memory->set_memory_limit_in_bytes(memory_limit);
|
|
||||||
return memory_limit;
|
|
||||||
}
|
|
||||||
|
|
||||||
jlong OSContainer::read_memory_limit_in_bytes() {
|
|
||||||
GET_CONTAINER_INFO(julong, memory, "/memory.limit_in_bytes",
|
|
||||||
"Memory Limit is: " JULONG_FORMAT, JULONG_FORMAT, memlimit);
|
|
||||||
|
|
||||||
if (memlimit >= _unlimited_memory) {
|
|
||||||
log_trace(os, container)("Non-Hierarchical Memory Limit is: Unlimited");
|
|
||||||
if (memory->is_hierarchical()) {
|
|
||||||
const char* matchline = "hierarchical_memory_limit";
|
|
||||||
const char* format = "%s " JULONG_FORMAT;
|
|
||||||
GET_CONTAINER_INFO_LINE(julong, memory, "/memory.stat", matchline,
|
|
||||||
"Hierarchical Memory Limit is: " JULONG_FORMAT, format, hier_memlimit)
|
|
||||||
if (hier_memlimit >= _unlimited_memory) {
|
|
||||||
log_trace(os, container)("Hierarchical Memory Limit is: Unlimited");
|
|
||||||
} else {
|
|
||||||
return (jlong)hier_memlimit;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return (jlong)-1;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
return (jlong)memlimit;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
jlong OSContainer::memory_and_swap_limit_in_bytes() {
|
jlong OSContainer::memory_and_swap_limit_in_bytes() {
|
||||||
GET_CONTAINER_INFO(julong, memory, "/memory.memsw.limit_in_bytes",
|
assert(cgroup_subsystem != NULL, "cgroup subsystem not available");
|
||||||
"Memory and Swap Limit is: " JULONG_FORMAT, JULONG_FORMAT, memswlimit);
|
return cgroup_subsystem->memory_and_swap_limit_in_bytes();
|
||||||
if (memswlimit >= _unlimited_memory) {
|
|
||||||
log_trace(os, container)("Non-Hierarchical Memory and Swap Limit is: Unlimited");
|
|
||||||
if (memory->is_hierarchical()) {
|
|
||||||
const char* matchline = "hierarchical_memsw_limit";
|
|
||||||
const char* format = "%s " JULONG_FORMAT;
|
|
||||||
GET_CONTAINER_INFO_LINE(julong, memory, "/memory.stat", matchline,
|
|
||||||
"Hierarchical Memory and Swap Limit is : " JULONG_FORMAT, format, hier_memlimit)
|
|
||||||
if (hier_memlimit >= _unlimited_memory) {
|
|
||||||
log_trace(os, container)("Hierarchical Memory and Swap Limit is: Unlimited");
|
|
||||||
} else {
|
|
||||||
return (jlong)hier_memlimit;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return (jlong)-1;
|
|
||||||
} else {
|
|
||||||
return (jlong)memswlimit;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
jlong OSContainer::memory_soft_limit_in_bytes() {
|
jlong OSContainer::memory_soft_limit_in_bytes() {
|
||||||
GET_CONTAINER_INFO(julong, memory, "/memory.soft_limit_in_bytes",
|
assert(cgroup_subsystem != NULL, "cgroup subsystem not available");
|
||||||
"Memory Soft Limit is: " JULONG_FORMAT, JULONG_FORMAT, memsoftlimit);
|
return cgroup_subsystem->memory_soft_limit_in_bytes();
|
||||||
if (memsoftlimit >= _unlimited_memory) {
|
|
||||||
log_trace(os, container)("Memory Soft Limit is: Unlimited");
|
|
||||||
return (jlong)-1;
|
|
||||||
} else {
|
|
||||||
return (jlong)memsoftlimit;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* memory_usage_in_bytes
|
|
||||||
*
|
|
||||||
* Return the amount of used memory for this process.
|
|
||||||
*
|
|
||||||
* return:
|
|
||||||
* memory usage in bytes or
|
|
||||||
* -1 for unlimited
|
|
||||||
* OSCONTAINER_ERROR for not supported
|
|
||||||
*/
|
|
||||||
jlong OSContainer::memory_usage_in_bytes() {
|
jlong OSContainer::memory_usage_in_bytes() {
|
||||||
GET_CONTAINER_INFO(jlong, memory, "/memory.usage_in_bytes",
|
assert(cgroup_subsystem != NULL, "cgroup subsystem not available");
|
||||||
"Memory Usage is: " JLONG_FORMAT, JLONG_FORMAT, memusage);
|
return cgroup_subsystem->memory_usage_in_bytes();
|
||||||
return memusage;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* memory_max_usage_in_bytes
|
|
||||||
*
|
|
||||||
* Return the maximum amount of used memory for this process.
|
|
||||||
*
|
|
||||||
* return:
|
|
||||||
* max memory usage in bytes or
|
|
||||||
* OSCONTAINER_ERROR for not supported
|
|
||||||
*/
|
|
||||||
jlong OSContainer::memory_max_usage_in_bytes() {
|
jlong OSContainer::memory_max_usage_in_bytes() {
|
||||||
GET_CONTAINER_INFO(jlong, memory, "/memory.max_usage_in_bytes",
|
assert(cgroup_subsystem != NULL, "cgroup subsystem not available");
|
||||||
"Maximum Memory Usage is: " JLONG_FORMAT, JLONG_FORMAT, memmaxusage);
|
return cgroup_subsystem->memory_max_usage_in_bytes();
|
||||||
return memmaxusage;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* active_processor_count
|
|
||||||
*
|
|
||||||
* Calculate an appropriate number of active processors for the
|
|
||||||
* VM to use based on these three inputs.
|
|
||||||
*
|
|
||||||
* cpu affinity
|
|
||||||
* cgroup cpu quota & cpu period
|
|
||||||
* cgroup cpu shares
|
|
||||||
*
|
|
||||||
* Algorithm:
|
|
||||||
*
|
|
||||||
* Determine the number of available CPUs from sched_getaffinity
|
|
||||||
*
|
|
||||||
* If user specified a quota (quota != -1), calculate the number of
|
|
||||||
* required CPUs by dividing quota by period.
|
|
||||||
*
|
|
||||||
* If shares are in effect (shares != -1), calculate the number
|
|
||||||
* of CPUs required for the shares by dividing the share value
|
|
||||||
* by PER_CPU_SHARES.
|
|
||||||
*
|
|
||||||
* All results of division are rounded up to the next whole number.
|
|
||||||
*
|
|
||||||
* If neither shares or quotas have been specified, return the
|
|
||||||
* number of active processors in the system.
|
|
||||||
*
|
|
||||||
* If both shares and quotas have been specified, the results are
|
|
||||||
* based on the flag PreferContainerQuotaForCPUCount. If true,
|
|
||||||
* return the quota value. If false return the smallest value
|
|
||||||
* between shares or quotas.
|
|
||||||
*
|
|
||||||
* If shares and/or quotas have been specified, the resulting number
|
|
||||||
* returned will never exceed the number of active processors.
|
|
||||||
*
|
|
||||||
* return:
|
|
||||||
* number of CPUs
|
|
||||||
*/
|
|
||||||
int OSContainer::active_processor_count() {
|
|
||||||
int quota_count = 0, share_count = 0;
|
|
||||||
int cpu_count, limit_count;
|
|
||||||
int result;
|
|
||||||
|
|
||||||
// We use a cache with a timeout to avoid performing expensive
|
|
||||||
// computations in the event this function is called frequently.
|
|
||||||
// [See 8227006].
|
|
||||||
if (!cpu->cache_has_expired()) {
|
|
||||||
log_trace(os, container)("OSContainer::active_processor_count (cached): %d", OSContainer::_active_processor_count);
|
|
||||||
return OSContainer::_active_processor_count;
|
|
||||||
}
|
|
||||||
|
|
||||||
cpu_count = limit_count = os::Linux::active_processor_count();
|
|
||||||
int quota = cpu_quota();
|
|
||||||
int period = cpu_period();
|
|
||||||
int share = cpu_shares();
|
|
||||||
|
|
||||||
if (quota > -1 && period > 0) {
|
|
||||||
quota_count = ceilf((float)quota / (float)period);
|
|
||||||
log_trace(os, container)("CPU Quota count based on quota/period: %d", quota_count);
|
|
||||||
}
|
|
||||||
if (share > -1) {
|
|
||||||
share_count = ceilf((float)share / (float)PER_CPU_SHARES);
|
|
||||||
log_trace(os, container)("CPU Share count based on shares: %d", share_count);
|
|
||||||
}
|
|
||||||
|
|
||||||
// If both shares and quotas are setup results depend
|
|
||||||
// on flag PreferContainerQuotaForCPUCount.
|
|
||||||
// If true, limit CPU count to quota
|
|
||||||
// If false, use minimum of shares and quotas
|
|
||||||
if (quota_count !=0 && share_count != 0) {
|
|
||||||
if (PreferContainerQuotaForCPUCount) {
|
|
||||||
limit_count = quota_count;
|
|
||||||
} else {
|
|
||||||
limit_count = MIN2(quota_count, share_count);
|
|
||||||
}
|
|
||||||
} else if (quota_count != 0) {
|
|
||||||
limit_count = quota_count;
|
|
||||||
} else if (share_count != 0) {
|
|
||||||
limit_count = share_count;
|
|
||||||
}
|
|
||||||
|
|
||||||
result = MIN2(cpu_count, limit_count);
|
|
||||||
log_trace(os, container)("OSContainer::active_processor_count: %d", result);
|
|
||||||
|
|
||||||
// Update the value and reset the cache timeout
|
|
||||||
OSContainer::_active_processor_count = result;
|
|
||||||
cpu->set_cache_expiry_time(OSCONTAINER_CACHE_TIMEOUT);
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
char * OSContainer::cpu_cpuset_cpus() {
|
char * OSContainer::cpu_cpuset_cpus() {
|
||||||
GET_CONTAINER_INFO_CPTR(cptr, cpuset, "/cpuset.cpus",
|
assert(cgroup_subsystem != NULL, "cgroup subsystem not available");
|
||||||
"cpuset.cpus is: %s", "%1023s", cpus, 1024);
|
return cgroup_subsystem->cpu_cpuset_cpus();
|
||||||
return os::strdup(cpus);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
char * OSContainer::cpu_cpuset_memory_nodes() {
|
char * OSContainer::cpu_cpuset_memory_nodes() {
|
||||||
GET_CONTAINER_INFO_CPTR(cptr, cpuset, "/cpuset.mems",
|
assert(cgroup_subsystem != NULL, "cgroup subsystem not available");
|
||||||
"cpuset.mems is: %s", "%1023s", mems, 1024);
|
return cgroup_subsystem->cpu_cpuset_memory_nodes();
|
||||||
return os::strdup(mems);
|
}
|
||||||
|
|
||||||
|
int OSContainer::active_processor_count() {
|
||||||
|
assert(cgroup_subsystem != NULL, "cgroup subsystem not available");
|
||||||
|
return cgroup_subsystem->active_processor_count();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* cpu_quota
|
|
||||||
*
|
|
||||||
* Return the number of milliseconds per period
|
|
||||||
* process is guaranteed to run.
|
|
||||||
*
|
|
||||||
* return:
|
|
||||||
* quota time in milliseconds
|
|
||||||
* -1 for no quota
|
|
||||||
* OSCONTAINER_ERROR for not supported
|
|
||||||
*/
|
|
||||||
int OSContainer::cpu_quota() {
|
int OSContainer::cpu_quota() {
|
||||||
GET_CONTAINER_INFO(int, cpu, "/cpu.cfs_quota_us",
|
assert(cgroup_subsystem != NULL, "cgroup subsystem not available");
|
||||||
"CPU Quota is: %d", "%d", quota);
|
return cgroup_subsystem->cpu_quota();
|
||||||
return quota;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int OSContainer::cpu_period() {
|
int OSContainer::cpu_period() {
|
||||||
GET_CONTAINER_INFO(int, cpu, "/cpu.cfs_period_us",
|
assert(cgroup_subsystem != NULL, "cgroup subsystem not available");
|
||||||
"CPU Period is: %d", "%d", period);
|
return cgroup_subsystem->cpu_period();
|
||||||
return period;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* cpu_shares
|
|
||||||
*
|
|
||||||
* Return the amount of cpu shares available to the process
|
|
||||||
*
|
|
||||||
* return:
|
|
||||||
* Share number (typically a number relative to 1024)
|
|
||||||
* (2048 typically expresses 2 CPUs worth of processing)
|
|
||||||
* -1 for no share setup
|
|
||||||
* OSCONTAINER_ERROR for not supported
|
|
||||||
*/
|
|
||||||
int OSContainer::cpu_shares() {
|
int OSContainer::cpu_shares() {
|
||||||
GET_CONTAINER_INFO(int, cpu, "/cpu.shares",
|
assert(cgroup_subsystem != NULL, "cgroup subsystem not available");
|
||||||
"CPU Shares is: %d", "%d", shares);
|
return cgroup_subsystem->cpu_shares();
|
||||||
// Convert 1024 to no shares setup
|
|
||||||
if (shares == 1024) return -1;
|
|
||||||
|
|
||||||
return shares;
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2017, 2019, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
@ -48,7 +48,6 @@ class OSContainer: AllStatic {
|
||||||
static inline bool is_containerized();
|
static inline bool is_containerized();
|
||||||
static const char * container_type();
|
static const char * container_type();
|
||||||
|
|
||||||
static jlong uses_mem_hierarchy();
|
|
||||||
static jlong memory_limit_in_bytes();
|
static jlong memory_limit_in_bytes();
|
||||||
static jlong memory_and_swap_limit_in_bytes();
|
static jlong memory_and_swap_limit_in_bytes();
|
||||||
static jlong memory_soft_limit_in_bytes();
|
static jlong memory_soft_limit_in_bytes();
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
@ -31,6 +31,7 @@
|
||||||
static bool zero_page_read_protected() { return true; }
|
static bool zero_page_read_protected() { return true; }
|
||||||
|
|
||||||
class Linux {
|
class Linux {
|
||||||
|
friend class CgroupSubsystem;
|
||||||
friend class os;
|
friend class os;
|
||||||
friend class OSContainer;
|
friend class OSContainer;
|
||||||
friend class TestReserveMemorySpecial;
|
friend class TestReserveMemorySpecial;
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2017, 2019, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
@ -34,6 +34,7 @@
|
||||||
* @run driver TestCPUAwareness
|
* @run driver TestCPUAwareness
|
||||||
*/
|
*/
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import jdk.test.lib.process.OutputAnalyzer;
|
||||||
import jdk.test.lib.containers.docker.Common;
|
import jdk.test.lib.containers.docker.Common;
|
||||||
import jdk.test.lib.containers.docker.DockerRunOptions;
|
import jdk.test.lib.containers.docker.DockerRunOptions;
|
||||||
import jdk.test.lib.containers.docker.DockerTestUtils;
|
import jdk.test.lib.containers.docker.DockerTestUtils;
|
||||||
|
@ -213,9 +214,21 @@ public class TestCPUAwareness {
|
||||||
|
|
||||||
DockerRunOptions opts = Common.newOpts(imageName)
|
DockerRunOptions opts = Common.newOpts(imageName)
|
||||||
.addDockerOpts("--cpu-shares=" + shares);
|
.addDockerOpts("--cpu-shares=" + shares);
|
||||||
Common.run(opts)
|
OutputAnalyzer out = Common.run(opts);
|
||||||
.shouldMatch("CPU Shares is.*" + shares)
|
// Cgroups v2 needs to do some scaling of raw shares values. Hence,
|
||||||
.shouldMatch("active_processor_count.*" + expectedAPC);
|
// 256 CPU shares come back as 264. Raw value written to cpu.weight
|
||||||
|
// is 10. The reason this works for >= 1024 shares value is because
|
||||||
|
// post-scaling the closest multiple of 1024 is found and returned.
|
||||||
|
//
|
||||||
|
// For values < 1024, this doesn't happen so loosen the match to a
|
||||||
|
// 3-digit number and ensure the active_processor_count is as
|
||||||
|
// expected.
|
||||||
|
if (shares < 1024) {
|
||||||
|
out.shouldMatch("CPU Shares is.*\\d{3}");
|
||||||
|
} else {
|
||||||
|
out.shouldMatch("CPU Shares is.*" + shares);
|
||||||
|
}
|
||||||
|
out.shouldMatch("active_processor_count.*" + expectedAPC);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void testOperatingSystemMXBeanAwareness(String cpuAllocation, String expectedCpus) throws Exception {
|
private static void testOperatingSystemMXBeanAwareness(String cpuAllocation, String expectedCpus) throws Exception {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue