/*
 * Copyright (c) 2019, 2025, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#include <string.h>
#include <math.h>
#include <errno.h>
#include <sys/vfs.h>
#include "cgroupSubsystem_linux.hpp"
#include "cgroupV1Subsystem_linux.hpp"
#include "cgroupV2Subsystem_linux.hpp"
#include "cgroupUtil_linux.hpp"
#include "logging/log.hpp"
#include "memory/allocation.hpp"
#include "os_linux.hpp"
#include "runtime/globals.hpp"
#include "runtime/os.hpp"
#include "utilities/globalDefinitions.hpp"

// Inlined from <linux/magic.h> for portability.
#ifndef CGROUP2_SUPER_MAGIC
#  define CGROUP2_SUPER_MAGIC 0x63677270
#endif

// controller names have to match the *_IDX indices
static const char* cg_controller_name[] = { "cpuset", "cpu", "cpuacct", "memory", "pids" };
static inline int cg_v2_controller_index(const char* name) {
  if (strcmp(name, "cpuset") == 0) {
    return CPUSET_IDX;
  } else if (strcmp(name, "cpu") == 0) {
    return CPU_IDX;
  } else if (strcmp(name, "memory") == 0) {
    return MEMORY_IDX;
  } else if (strcmp(name, "pids") == 0) {
    return PIDS_IDX;
  } else {
    return -1;
  }
}

CgroupSubsystem* CgroupSubsystemFactory::create() {
  CgroupV1MemoryController* memory = nullptr;
  CgroupV1Controller* cpuset = nullptr;
  CgroupV1CpuController* cpu = nullptr;
  CgroupV1CpuacctController* cpuacct = nullptr;
  CgroupV1Controller* pids = nullptr;
  CgroupInfo cg_infos[CG_INFO_LENGTH];
  u1 cg_type_flags = INVALID_CGROUPS_GENERIC;
  const char* proc_cgroups = "/proc/cgroups";
  const char* sys_fs_cgroup_cgroup_controllers = "/sys/fs/cgroup/cgroup.controllers";
  const char* controllers_file = proc_cgroups;
  const char* proc_self_cgroup = "/proc/self/cgroup";
  const char* proc_self_mountinfo = "/proc/self/mountinfo";
  const char* sys_fs_cgroup = "/sys/fs/cgroup";
  struct statfs fsstat = {};
  bool cgroups_v2_enabled = false;

  // Assume cgroups v2 is usable by the JDK iff /sys/fs/cgroup has the cgroup v2
  // file system magic.  If it does not then heuristics are required to determine
  // if cgroups v1 is usable or not.
  if (statfs(sys_fs_cgroup, &fsstat) != -1) {
    cgroups_v2_enabled = (fsstat.f_type == CGROUP2_SUPER_MAGIC);
    if (cgroups_v2_enabled) {
      controllers_file = sys_fs_cgroup_cgroup_controllers;
    }
  }

  bool valid_cgroup = determine_type(cg_infos, cgroups_v2_enabled, controllers_file, proc_self_cgroup, proc_self_mountinfo, &cg_type_flags);

  if (!valid_cgroup) {
    // Could not detect cgroup type
    return nullptr;
  }
  assert(is_valid_cgroup(&cg_type_flags), "Expected valid cgroup type");

  if (is_cgroup_v2(&cg_type_flags)) {
    // Cgroups v2 case, we have all the info we need.
    // Construct the subsystem, free resources and return
    // Note: We use the memory for non-cpu non-memory controller look-ups.
    //       Perhaps we ought to have separate controllers for all.
    CgroupV2Controller mem_other = CgroupV2Controller(cg_infos[MEMORY_IDX]._mount_path,
                                                      cg_infos[MEMORY_IDX]._cgroup_path,
                                                      cg_infos[MEMORY_IDX]._read_only);
    CgroupV2MemoryController* memory = new CgroupV2MemoryController(mem_other);
    CgroupV2CpuController* cpu = new CgroupV2CpuController(CgroupV2Controller(cg_infos[CPU_IDX]._mount_path,
                                                                              cg_infos[CPU_IDX]._cgroup_path,
                                                                              cg_infos[CPU_IDX]._read_only));
    CgroupV2CpuacctController* cpuacct = new CgroupV2CpuacctController(cpu);
    log_debug(os, container)("Detected cgroups v2 unified hierarchy");
    cleanup(cg_infos);
    return new CgroupV2Subsystem(memory, cpu, cpuacct, mem_other);
  }

  /*
   * Cgroup v1 case:
   *
   * Use info gathered previously from /proc/self/cgroup
   * and map host mount point to
   * local one via /proc/self/mountinfo content above
   *
   * Docker example:
   * 5:memory:/docker/6558aed8fc662b194323ceab5b964f69cf36b3e8af877a14b80256e93aecb044
   *
   * Host example:
   * 5:memory:/user.slice
   *
   * Construct a path to the process specific memory and cpuset
   * cgroup directory.
   *
   * For a container running under Docker from memory example above
   * the paths would be:
   *
   * /sys/fs/cgroup/memory
   *
   * For a Host from memory example above the path would be:
   *
   * /sys/fs/cgroup/memory/user.slice
   *
   */
  assert(is_cgroup_v1(&cg_type_flags), "Cgroup v1 expected");
  for (int i = 0; i < CG_INFO_LENGTH; i++) {
    CgroupInfo info = cg_infos[i];
    if (info._data_complete) { // pids controller might have incomplete data
      if (strcmp(info._name, "memory") == 0) {
        memory = new CgroupV1MemoryController(CgroupV1Controller(info._root_mount_path, info._mount_path, info._read_only));
        memory->set_subsystem_path(info._cgroup_path);
      } else if (strcmp(info._name, "cpuset") == 0) {
        cpuset = new CgroupV1Controller(info._root_mount_path, info._mount_path, info._read_only);
        cpuset->set_subsystem_path(info._cgroup_path);
      } else if (strcmp(info._name, "cpu") == 0) {
        cpu = new CgroupV1CpuController(CgroupV1Controller(info._root_mount_path, info._mount_path, info._read_only));
        cpu->set_subsystem_path(info._cgroup_path);
      } else if (strcmp(info._name, "cpuacct") == 0) {
        cpuacct = new CgroupV1CpuacctController(CgroupV1Controller(info._root_mount_path, info._mount_path, info._read_only));
        cpuacct->set_subsystem_path(info._cgroup_path);
      } else if (strcmp(info._name, "pids") == 0) {
        pids = new CgroupV1Controller(info._root_mount_path, info._mount_path, info._read_only);
        pids->set_subsystem_path(info._cgroup_path);
      }
    } else {
      log_debug(os, container)("CgroupInfo for %s not complete", cg_controller_name[i]);
    }
  }
  cleanup(cg_infos);
  return new CgroupV1Subsystem(cpuset, cpu, cpuacct, pids, memory);
}

void CgroupSubsystemFactory::set_controller_paths(CgroupInfo* cg_infos,
                                                  int controller,
                                                  const char* name,
                                                  char* mount_path,
                                                  char* root_path,
                                                  bool read_only) {
  if (cg_infos[controller]._mount_path != nullptr) {
    // On some systems duplicate controllers get mounted in addition to
    // the main cgroup controllers most likely under /sys/fs/cgroup. In that
    // case pick the one under /sys/fs/cgroup and discard others.
    if (strstr(cg_infos[controller]._mount_path, "/sys/fs/cgroup") != cg_infos[controller]._mount_path) {
      log_debug(os, container)("Duplicate %s controllers detected. Picking %s, skipping %s.",
                               name, mount_path, cg_infos[controller]._mount_path);
      os::free(cg_infos[controller]._mount_path);
      os::free(cg_infos[controller]._root_mount_path);
      cg_infos[controller]._mount_path = os::strdup(mount_path);
      cg_infos[controller]._root_mount_path = os::strdup(root_path);
      cg_infos[controller]._read_only = read_only;
    } else {
      log_debug(os, container)("Duplicate %s controllers detected. Picking %s, skipping %s.",
                               name, cg_infos[controller]._mount_path, mount_path);
    }
  } else {
    cg_infos[controller]._mount_path = os::strdup(mount_path);
    cg_infos[controller]._root_mount_path = os::strdup(root_path);
    cg_infos[controller]._read_only = read_only;
  }
}

/*
 * Determine whether or not the mount options, which are comma separated,
 * contain the 'ro' string.
 */
static bool find_ro_opt(char* mount_opts) {
  char* token;
  char* mo_ptr = mount_opts;
  // mount options are comma-separated (man proc).
  while ((token = strsep(&mo_ptr, ",")) != nullptr) {
    if (strcmp(token, "ro") == 0) {
      return true;
    }
  }
  return false;
}

/*
 * Read values of a /proc/self/mountinfo line into variables. For cgroups v1
 * super options are needed. On cgroups v2 super options are not used.
 *
 * The scanning of a single mountinfo line entry is as follows:
 *
 * 36  35  98:0      /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
 * (1) (2) (3):(4)   (5)   (6)      (7)      (8)   (9) (10)   (11)         (12)
 *
 * The numbers in parentheses are labels for the descriptions below:
 *
 *  (1)   mount ID:        matched with '%*d' and discarded
 *  (2)   parent ID:       matched with '%*d' and discarded
 *  (3)   major:           ---,---> major, minor separated by ':'. matched with '%*d:%*d' and discarded
 *  (4)   minor:           ---'
 *  (5)   root:            matched with '%s' and captured in 'tmproot'. Must be non-empty.
 *  (6)   mount point:     matched with '%s' and captured in 'tmpmount'. Must be non-empty.
 *  (7)   mount options:   matched with '%s' and captured in 'mount_opts'. Must be non-empty.
 *  (8)   optional fields: ---,---> matched with '%*[^-]-'. Anything not a hyphen, followed by a hyphen
 *  (9)   separator:       ---'     and discarded. Note: The discarded match is space characters if there
 *                                  are no optionals. Otherwise it includes the optional fields as well.
 * (10)   filesystem type: matched with '%s' and captured in 'tmp_fs_type'
 * (11)   mount source:    matched with '%*s' and discarded
 * (12)   super options:   matched with '%s' and captured in 'tmpcgroups'
 */
static inline bool match_mount_info_line(char* line,
                                         char* tmproot,
                                         char* tmpmount,
                                         char* mount_opts,
                                         char* tmp_fs_type,
                                         char* tmpcgroups) {
 return sscanf(line,
               "%*d %*d %*d:%*d %s %s %s%*[^-]- %s %*s %s",
               tmproot,
               tmpmount,
               mount_opts,
               tmp_fs_type,
               tmpcgroups) == 5;
}

bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
                                            bool cgroups_v2_enabled,
                                            const char* controllers_file,
                                            const char* proc_self_cgroup,
                                            const char* proc_self_mountinfo,
                                            u1* flags) {
  FILE *mntinfo = nullptr;
  FILE* controllers = nullptr;
  FILE *cgroup = nullptr;
  char buf[MAXPATHLEN+1];
  char *p;
  // true iff all required controllers, memory, cpu, cpuacct are enabled
  // at the kernel level.
  // pids might not be enabled on older Linux distros (SLES 12.1, RHEL 7.1)
  // cpuset might not be enabled on newer Linux distros (Fedora 41)
  bool all_required_controllers_enabled = true;

  // If cgroups v2 is enabled, open /sys/fs/cgroup/cgroup.controllers.  If not, open /proc/cgroups.
  controllers = os::fopen(controllers_file, "r");
  if (controllers == nullptr) {
    log_debug(os, container)("Can't open %s, %s", controllers_file, os::strerror(errno));
    *flags = INVALID_CGROUPS_GENERIC;
    return false;
  }

  if (cgroups_v2_enabled) {
    /*
     * cgroups v2 is enabled.  For cgroups v2 (unified hierarchy), the cpu and memory
     * controllers must be enabled.
     */
    if ((p = fgets(buf, MAXPATHLEN, controllers)) != nullptr) {
      char* controller = nullptr;
      #define ISSPACE_CHARS " \n\t\r\f\v"
      while ((controller = strsep(&p, ISSPACE_CHARS)) != nullptr) {
        int i;
        if ((i = cg_v2_controller_index(controller)) != -1) {
          cg_infos[i]._name = os::strdup(controller);
          cg_infos[i]._enabled = true;
          if (i == PIDS_IDX || i == CPUSET_IDX) {
            log_debug(os, container)("Detected optional %s controller entry in %s",
                                     controller, controllers_file);
          }
        }
      }
      #undef ISSPACE_CHARS
    } else {
      log_debug(os, container)("Can't read %s, %s", controllers_file, os::strerror(errno));
      *flags = INVALID_CGROUPS_V2;
      fclose(controllers);
      return false;
    }
    for (int i = 0; i < CG_INFO_LENGTH; i++) {
      // cgroups v2 does not have cpuacct.
      if (i == CPUACCT_IDX) {
        continue;
      }
      // For cgroups v2, cpuacct is rolled into cpu, and the pids and cpuset controllers
      // are optional; the remaining controllers, cpu and memory, are required.
      if (i == CPU_IDX || i == MEMORY_IDX) {
        all_required_controllers_enabled = all_required_controllers_enabled && cg_infos[i]._enabled;
      }
      if (log_is_enabled(Debug, os, container) && !cg_infos[i]._enabled) {
        log_debug(os, container)("controller %s is not enabled", cg_controller_name[i]);
      }
    }
  } else {
    /*
     * The /sys/fs/cgroup filesystem magic hint suggests we have cg v1.  Read /proc/cgroups; for
     * cgroups v1 hierarchy (hybrid or legacy), cpu, cpuacct, cpuset, and memory controllers must
     * have non-zero for the hierarchy ID field and relevant controllers mounted.
     */
    while ((p = fgets(buf, MAXPATHLEN, controllers)) != nullptr) {
      char name[MAXPATHLEN+1];
      int  hierarchy_id;
      int  enabled;

      // Format of /proc/cgroups documented via man 7 cgroups
      if (sscanf(p, "%s %d %*d %d", name, &hierarchy_id, &enabled) != 3) {
        continue;
      }
      if (strcmp(name, "memory") == 0) {
        cg_infos[MEMORY_IDX]._name = os::strdup(name);
        cg_infos[MEMORY_IDX]._hierarchy_id = hierarchy_id;
        cg_infos[MEMORY_IDX]._enabled = (enabled == 1);
      } else if (strcmp(name, "cpuset") == 0) {
        cg_infos[CPUSET_IDX]._name = os::strdup(name);
        cg_infos[CPUSET_IDX]._hierarchy_id = hierarchy_id;
        cg_infos[CPUSET_IDX]._enabled = (enabled == 1);
      } else if (strcmp(name, "cpu") == 0) {
        cg_infos[CPU_IDX]._name = os::strdup(name);
        cg_infos[CPU_IDX]._hierarchy_id = hierarchy_id;
        cg_infos[CPU_IDX]._enabled = (enabled == 1);
      } else if (strcmp(name, "cpuacct") == 0) {
        cg_infos[CPUACCT_IDX]._name = os::strdup(name);
        cg_infos[CPUACCT_IDX]._hierarchy_id = hierarchy_id;
        cg_infos[CPUACCT_IDX]._enabled = (enabled == 1);
      } else if (strcmp(name, "pids") == 0) {
        log_debug(os, container)("Detected optional pids controller entry in %s", controllers_file);
        cg_infos[PIDS_IDX]._name = os::strdup(name);
        cg_infos[PIDS_IDX]._hierarchy_id = hierarchy_id;
        cg_infos[PIDS_IDX]._enabled = (enabled == 1);
      }
    }
    for (int i = 0; i < CG_INFO_LENGTH; i++) {
      // pids controller is optional. All other controllers are required
      if (i != PIDS_IDX) {
        all_required_controllers_enabled = all_required_controllers_enabled && cg_infos[i]._enabled;
      }
      if (log_is_enabled(Debug, os, container) && !cg_infos[i]._enabled) {
        log_debug(os, container)("controller %s is not enabled", cg_controller_name[i]);
      }
    }
  }
  fclose(controllers);

  if (!all_required_controllers_enabled) {
    // one or more required controllers disabled, disable container support
    log_debug(os, container)("One or more required controllers disabled at kernel level.");
    cleanup(cg_infos);
    *flags = INVALID_CGROUPS_GENERIC;
    return false;
  }

  /*
   * Read /proc/self/cgroup and determine:
   *  - the cgroup path for cgroups v2 or
   *  - on a cgroups v1 system, collect info for mapping
   *    the host mount point to the local one via /proc/self/mountinfo below.
   */
  cgroup = os::fopen(proc_self_cgroup, "r");
  if (cgroup == nullptr) {
    log_debug(os, container)("Can't open %s, %s",
                             proc_self_cgroup, os::strerror(errno));
    cleanup(cg_infos);
    *flags = INVALID_CGROUPS_GENERIC;
    return false;
  }

  while ((p = fgets(buf, MAXPATHLEN, cgroup)) != nullptr) {
    char *controllers;
    char *token;
    char *hierarchy_id_str;
    int  hierarchy_id;
    char *cgroup_path;

    hierarchy_id_str = strsep(&p, ":");
    hierarchy_id = atoi(hierarchy_id_str);
    /* Get controllers and base */
    controllers = strsep(&p, ":");
    cgroup_path = strsep(&p, "\n");

    if (controllers == nullptr) {
      continue;
    }

    while (!cgroups_v2_enabled && (token = strsep(&controllers, ",")) != nullptr) {
      if (strcmp(token, "memory") == 0) {
        assert(hierarchy_id == cg_infos[MEMORY_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for memory");
        cg_infos[MEMORY_IDX]._cgroup_path = os::strdup(cgroup_path);
      } else if (strcmp(token, "cpuset") == 0) {
        assert(hierarchy_id == cg_infos[CPUSET_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for cpuset");
        cg_infos[CPUSET_IDX]._cgroup_path = os::strdup(cgroup_path);
      } else if (strcmp(token, "cpu") == 0) {
        assert(hierarchy_id == cg_infos[CPU_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for cpu");
        cg_infos[CPU_IDX]._cgroup_path = os::strdup(cgroup_path);
      } else if (strcmp(token, "cpuacct") == 0) {
        assert(hierarchy_id == cg_infos[CPUACCT_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for cpuacct");
        cg_infos[CPUACCT_IDX]._cgroup_path = os::strdup(cgroup_path);
      } else if (strcmp(token, "pids") == 0) {
        assert(hierarchy_id == cg_infos[PIDS_IDX]._hierarchy_id, "/proc/cgroups (%d) and /proc/self/cgroup (%d) hierarchy mismatch for pids",
                                                                 cg_infos[PIDS_IDX]._hierarchy_id, hierarchy_id);
        cg_infos[PIDS_IDX]._cgroup_path = os::strdup(cgroup_path);
      }
    }
    if (cgroups_v2_enabled) {
      // On some systems we have mixed cgroups v1 and cgroups v2 controllers (e.g. freezer on cg1 and
      // all relevant controllers on cg2). Only set the cgroup path when we see a hierarchy id of 0.
      if (hierarchy_id != 0) {
        continue;
      }
      for (int i = 0; i < CG_INFO_LENGTH; i++) {
        assert(cg_infos[i]._cgroup_path == nullptr, "cgroup path must only be set once");
        cg_infos[i]._cgroup_path = os::strdup(cgroup_path);
      }
    }
  }
  fclose(cgroup);

  // Find various mount points by reading /proc/self/mountinfo
  // mountinfo format is documented at https://www.kernel.org/doc/Documentation/filesystems/proc.txt
  mntinfo = os::fopen(proc_self_mountinfo, "r");
  if (mntinfo == nullptr) {
      log_debug(os, container)("Can't open %s, %s",
                               proc_self_mountinfo, os::strerror(errno));
      cleanup(cg_infos);
      *flags = INVALID_CGROUPS_GENERIC;
      return false;
  }

  bool cgroupv2_mount_point_found = false;
  bool any_cgroup_mounts_found = false;
  while ((p = fgets(buf, MAXPATHLEN, mntinfo)) != nullptr) {
    char tmp_fs_type[MAXPATHLEN+1];
    char tmproot[MAXPATHLEN+1];
    char tmpmount[MAXPATHLEN+1];
    char tmpcgroups[MAXPATHLEN+1];
    char mount_opts[MAXPATHLEN+1];
    char *cptr = tmpcgroups;
    char *token;

    /* Cgroup v2 relevant info. We only look for the _mount_path iff cgroups_v2_enabled so
     * as to avoid memory stomping of the _mount_path pointer later on in the cgroup v1
     * block in the hybrid case.
     *
     * We collect the read only mount option in the cgroup infos so as to have that
     * info ready when determining is_containerized().
     */
    if (cgroups_v2_enabled && match_mount_info_line(p,
                                              tmproot,
                                              tmpmount,
                                              mount_opts,
                                              tmp_fs_type,
                                              tmpcgroups /* unused */)) {
      // we likely have an early match return (e.g. cgroup fs match), be sure we have cgroup2 as fstype
      if (strcmp("cgroup2", tmp_fs_type) == 0) {
        cgroupv2_mount_point_found = true;
        any_cgroup_mounts_found = true;
        // For unified we only have a single line with cgroup2 fs type.
        // Therefore use that option for all CG info structs.
        bool ro_option = find_ro_opt(mount_opts);
        for (int i = 0; i < CG_INFO_LENGTH; i++) {
          set_controller_paths(cg_infos, i, "(cg2, unified)", tmpmount, tmproot, ro_option);
        }
      }
    }

    /* Cgroup v1 relevant info
     *
     * Find the cgroup mount point for memory, cpuset, cpu, cpuacct, pids. For each controller
     * determine whether or not they show up as mounted read only or not.
     *
     * Example for docker:
     * 219 214 0:29 /docker/7208cebd00fa5f2e342b1094f7bed87fa25661471a4637118e65f1c995be8a34 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory
     *
     * Example for host:
     * 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory
     *
     * 44 31 0:39 / /sys/fs/cgroup/pids rw,nosuid,nodev,noexec,relatime shared:23 - cgroup cgroup rw,pids
     *
     */
    if (match_mount_info_line(p, tmproot, tmpmount, mount_opts, tmp_fs_type, tmpcgroups)) {
      if (strcmp("cgroup", tmp_fs_type) != 0) {
        // Skip cgroup2 fs lines on hybrid or unified hierarchy.
        continue;
      }
      while ((token = strsep(&cptr, ",")) != nullptr) {
        if (strcmp(token, "memory") == 0) {
          any_cgroup_mounts_found = true;
          bool ro_option = find_ro_opt(mount_opts);
          set_controller_paths(cg_infos, MEMORY_IDX, token, tmpmount, tmproot, ro_option);
          cg_infos[MEMORY_IDX]._data_complete = true;
        } else if (strcmp(token, "cpuset") == 0) {
          any_cgroup_mounts_found = true;
          bool ro_option = find_ro_opt(mount_opts);
          set_controller_paths(cg_infos, CPUSET_IDX, token, tmpmount, tmproot, ro_option);
          cg_infos[CPUSET_IDX]._data_complete = true;
        } else if (strcmp(token, "cpu") == 0) {
          any_cgroup_mounts_found = true;
          bool ro_option = find_ro_opt(mount_opts);
          set_controller_paths(cg_infos, CPU_IDX, token, tmpmount, tmproot, ro_option);
          cg_infos[CPU_IDX]._data_complete = true;
        } else if (strcmp(token, "cpuacct") == 0) {
          any_cgroup_mounts_found = true;
          bool ro_option = find_ro_opt(mount_opts);
          set_controller_paths(cg_infos, CPUACCT_IDX, token, tmpmount, tmproot, ro_option);
          cg_infos[CPUACCT_IDX]._data_complete = true;
        } else if (strcmp(token, "pids") == 0) {
          any_cgroup_mounts_found = true;
          bool ro_option = find_ro_opt(mount_opts);
          set_controller_paths(cg_infos, PIDS_IDX, token, tmpmount, tmproot, ro_option);
          cg_infos[PIDS_IDX]._data_complete = true;
        }
      }
    }
  }
  fclose(mntinfo);

  // Neither cgroup2 nor cgroup filesystems mounted via /proc/self/mountinfo
  // No point in continuing.
  if (!any_cgroup_mounts_found) {
    log_trace(os, container)("No relevant cgroup controllers mounted.");
    cleanup(cg_infos);
    *flags = INVALID_CGROUPS_NO_MOUNT;
    return false;
  }

  if (cgroups_v2_enabled) {
    if (!cgroupv2_mount_point_found) {
      log_trace(os, container)("Mount point for cgroupv2 not found in /proc/self/mountinfo");
      cleanup(cg_infos);
      *flags = INVALID_CGROUPS_V2;
      return false;
    }
    // Cgroups v2 case, we have all the info we need.
    *flags = CGROUPS_V2;
    return true;
  }

  // What follows is cgroups v1
  log_debug(os, container)("Detected cgroups hybrid or legacy hierarchy, using cgroups v1 controllers");

  if (!cg_infos[MEMORY_IDX]._data_complete) {
    log_debug(os, container)("Required cgroup v1 memory subsystem not found");
    cleanup(cg_infos);
    *flags = INVALID_CGROUPS_V1;
    return false;
  }
  if (!cg_infos[CPUSET_IDX]._data_complete) {
    log_debug(os, container)("Required cgroup v1 cpuset subsystem not found");
    cleanup(cg_infos);
    *flags = INVALID_CGROUPS_V1;
    return false;
  }
  if (!cg_infos[CPU_IDX]._data_complete) {
    log_debug(os, container)("Required cgroup v1 cpu subsystem not found");
    cleanup(cg_infos);
    *flags = INVALID_CGROUPS_V1;
    return false;
  }
  if (!cg_infos[CPUACCT_IDX]._data_complete) {
    log_debug(os, container)("Required cgroup v1 cpuacct subsystem not found");
    cleanup(cg_infos);
    *flags = INVALID_CGROUPS_V1;
    return false;
  }
  if (log_is_enabled(Debug, os, container) && !cg_infos[PIDS_IDX]._data_complete) {
    log_debug(os, container)("Optional cgroup v1 pids subsystem not found");
    // keep the other controller info, pids is optional
  }
  // Cgroups v1 case, we have all the info we need.
  *flags = CGROUPS_V1;
  return true;
};

void CgroupSubsystemFactory::cleanup(CgroupInfo* cg_infos) {
  assert(cg_infos != nullptr, "Invariant");
  for (int i = 0; i < CG_INFO_LENGTH; i++) {
    os::free(cg_infos[i]._name);
    os::free(cg_infos[i]._cgroup_path);
    os::free(cg_infos[i]._root_mount_path);
    os::free(cg_infos[i]._mount_path);
  }
}

/* active_processor_count
 *
 * Calculate an appropriate number of active processors for the
 * VM to use based on these three inputs.
 *
 * cpu affinity
 * cgroup cpu quota & cpu period
 * cgroup cpu shares
 *
 * Algorithm:
 *
 * Determine the number of available CPUs from sched_getaffinity
 *
 * If user specified a quota (quota != -1), calculate the number of
 * required CPUs by dividing quota by period.
 *
 * All results of division are rounded up to the next whole number.
 *
 * If quotas have not been specified, return the
 * number of active processors in the system.
 *
 * If quotas have been specified, the resulting number
 * returned will never exceed the number of active processors.
 *
 * return:
 *    number of CPUs
 */
int CgroupSubsystem::active_processor_count() {
  int quota_count = 0;
  int cpu_count;
  int result;

  // We use a cache with a timeout to avoid performing expensive
  // computations in the event this function is called frequently.
  // [See 8227006].
  CachingCgroupController<CgroupCpuController>* contrl = cpu_controller();
  CachedMetric* cpu_limit = contrl->metrics_cache();
  if (!cpu_limit->should_check_metric()) {
    int val = (int)cpu_limit->value();
    log_trace(os, container)("CgroupSubsystem::active_processor_count (cached): %d", val);
    return val;
  }

  cpu_count = os::Linux::active_processor_count();
  result = CgroupUtil::processor_count(contrl->controller(), cpu_count);
  // Update cached metric to avoid re-reading container settings too often
  cpu_limit->set_value(result, OSCONTAINER_CACHE_TIMEOUT);

  return result;
}

/* memory_limit_in_bytes
 *
 * Return the limit of available memory for this process.
 *
 * return:
 *    memory limit in bytes or
 *    -1 for unlimited
 *    OSCONTAINER_ERROR for not supported
 */
jlong CgroupSubsystem::memory_limit_in_bytes() {
  CachingCgroupController<CgroupMemoryController>* contrl = memory_controller();
  CachedMetric* memory_limit = contrl->metrics_cache();
  if (!memory_limit->should_check_metric()) {
    return memory_limit->value();
  }
  jlong phys_mem = os::Linux::physical_memory();
  log_trace(os, container)("total physical memory: " JLONG_FORMAT, phys_mem);
  jlong mem_limit = contrl->controller()->read_memory_limit_in_bytes(phys_mem);
  // Update cached metric to avoid re-reading container settings too often
  memory_limit->set_value(mem_limit, OSCONTAINER_CACHE_TIMEOUT);
  return mem_limit;
}

bool CgroupController::read_string(const char* filename, char* buf, size_t buf_size) {
  assert(buf != nullptr, "buffer must not be null");
  assert(filename != nullptr, "filename must be given");
  const char* s_path = subsystem_path();
  if (s_path == nullptr) {
    log_debug(os, container)("read_string: subsystem path is null");
    return false;
  }

  stringStream file_path;
  file_path.print_raw(s_path);
  file_path.print_raw(filename);

  if (file_path.size() > MAXPATHLEN) {
    log_debug(os, container)("File path too long %s, %s", file_path.base(), filename);
    return false;
  }
  const char* absolute_path = file_path.freeze();
  log_trace(os, container)("Path to %s is %s", filename, absolute_path);

  FILE* fp = os::fopen(absolute_path, "r");
  if (fp == nullptr) {
    log_debug(os, container)("Open of file %s failed, %s", absolute_path, os::strerror(errno));
    return false;
  }

  // Read a single line into the provided buffer.
  // At most buf_size - 1 characters.
  char* line = fgets(buf, buf_size, fp);
  fclose(fp);
  if (line == nullptr) {
    log_debug(os, container)("Empty file %s", absolute_path);
    return false;
  }
  size_t len = strlen(line);
  assert(len <= buf_size - 1, "At most buf_size - 1 bytes can be read");
  if (line[len - 1] == '\n') {
    line[len - 1] = '\0'; // trim trailing new line
  }
  return true;
}

bool CgroupController::read_number(const char* filename, julong* result) {
  char buf[1024];
  bool is_ok = read_string(filename, buf, 1024);
  if (!is_ok) {
    return false;
  }
  int matched = sscanf(buf, JULONG_FORMAT, result);
  if (matched == 1) {
    return true;
  }
  return false;
}

bool CgroupController::read_number_handle_max(const char* filename, jlong* result) {
  char buf[1024];
  bool is_ok = read_string(filename, buf, 1024);
  if (!is_ok) {
    return false;
  }
  jlong val = limit_from_str(buf);
  if (val == OSCONTAINER_ERROR) {
    return false;
  }
  *result = val;
  return true;
}

bool CgroupController::read_numerical_key_value(const char* filename, const char* key, julong* result) {
  assert(key != nullptr, "key must be given");
  assert(result != nullptr, "result pointer must not be null");
  assert(filename != nullptr, "file to search in must be given");
  const char* s_path = subsystem_path();
  if (s_path == nullptr) {
    log_debug(os, container)("read_numerical_key_value: subsystem path is null");
    return false;
  }

  stringStream file_path;
  file_path.print_raw(s_path);
  file_path.print_raw(filename);

  if (file_path.size() > MAXPATHLEN) {
    log_debug(os, container)("File path too long %s, %s", file_path.base(), filename);
    return false;
  }
  const char* absolute_path = file_path.freeze();
  log_trace(os, container)("Path to %s is %s", filename, absolute_path);
  FILE* fp = os::fopen(absolute_path, "r");
  if (fp == nullptr) {
    log_debug(os, container)("Open of file %s failed, %s", absolute_path, os::strerror(errno));
    return false;
  }

  const int buf_len = MAXPATHLEN+1;
  char buf[buf_len];
  char* line = fgets(buf, buf_len, fp);
  bool found_match = false;
  // File consists of multiple lines in a "key value"
  // fashion, we have to find the key.
  const size_t key_len = strlen(key);
  for (; line != nullptr; line = fgets(buf, buf_len, fp)) {
    char after_key = line[key_len];
    if (strncmp(line, key, key_len) == 0
          && isspace((unsigned char) after_key) != 0
          && after_key != '\n') {
      // Skip key, skip space
      const char* value_substr = line + key_len + 1;
      int matched = sscanf(value_substr, JULONG_FORMAT, result);
      found_match = matched == 1;
      if (found_match) {
        break;
      }
    }
  }
  fclose(fp);
  if (found_match) {
    return true;
  }
  log_debug(os, container)("Type %s (key == %s) not found in file %s", JULONG_FORMAT,
                           key, absolute_path);
  return false;
}

bool CgroupController::read_numerical_tuple_value(const char* filename, bool use_first, jlong* result) {
  char buf[1024];
  bool is_ok = read_string(filename, buf, 1024);
  if (!is_ok) {
    return false;
  }
  char token[1024];
  const int matched = sscanf(buf, (use_first ? "%1023s %*s" : "%*s %1023s"), token);
  if (matched != 1) {
    return false;
  }
  jlong val = limit_from_str(token);
  if (val == OSCONTAINER_ERROR) {
    return false;
  }
  *result = val;
  return true;
}

jlong CgroupController::limit_from_str(char* limit_str) {
  if (limit_str == nullptr) {
    return OSCONTAINER_ERROR;
  }
  // Unlimited memory in cgroups is the literal string 'max' for
  // some controllers, for example the pids controller.
  if (strcmp("max", limit_str) == 0) {
    return (jlong)-1;
  }
  julong limit;
  if (sscanf(limit_str, JULONG_FORMAT, &limit) != 1) {
    return OSCONTAINER_ERROR;
  }
  return (jlong)limit;
}

// CgroupSubsystem implementations

jlong CgroupSubsystem::memory_and_swap_limit_in_bytes() {
  julong phys_mem = os::Linux::physical_memory();
  julong host_swap = os::Linux::host_swap();
  return memory_controller()->controller()->memory_and_swap_limit_in_bytes(phys_mem, host_swap);
}

jlong CgroupSubsystem::memory_and_swap_usage_in_bytes() {
  julong phys_mem = os::Linux::physical_memory();
  julong host_swap = os::Linux::host_swap();
  return memory_controller()->controller()->memory_and_swap_usage_in_bytes(phys_mem, host_swap);
}

jlong CgroupSubsystem::memory_soft_limit_in_bytes() {
  julong phys_mem = os::Linux::physical_memory();
  return memory_controller()->controller()->memory_soft_limit_in_bytes(phys_mem);
}

jlong CgroupSubsystem::memory_throttle_limit_in_bytes() {
  return memory_controller()->controller()->memory_throttle_limit_in_bytes();
}

jlong CgroupSubsystem::memory_usage_in_bytes() {
  return memory_controller()->controller()->memory_usage_in_bytes();
}

jlong CgroupSubsystem::memory_max_usage_in_bytes() {
  return memory_controller()->controller()->memory_max_usage_in_bytes();
}

jlong CgroupSubsystem::rss_usage_in_bytes() {
  return memory_controller()->controller()->rss_usage_in_bytes();
}

jlong CgroupSubsystem::cache_usage_in_bytes() {
  return memory_controller()->controller()->cache_usage_in_bytes();
}

int CgroupSubsystem::cpu_quota() {
  return cpu_controller()->controller()->cpu_quota();
}

int CgroupSubsystem::cpu_period() {
  return cpu_controller()->controller()->cpu_period();
}

int CgroupSubsystem::cpu_shares() {
  return cpu_controller()->controller()->cpu_shares();
}

jlong CgroupSubsystem::cpu_usage_in_micros() {
  return cpuacct_controller()->cpu_usage_in_micros();
}

void CgroupSubsystem::print_version_specific_info(outputStream* st) {
  julong phys_mem = os::Linux::physical_memory();
  memory_controller()->controller()->print_version_specific_info(st, phys_mem);
}
