6684395: Port NUMA-aware allocator to linux

NUMA-aware allocator port to Linux Reviewed-by: jmasa, apetrusenko
2025-08-26 22:34:27 +02:00 · 2008-04-29 13:51:26 +04:00 · 2008-04-29 13:51:26 +04:00 · a24f915b60
commit a24f915b60
parent f784be24d1
14 changed files with 260 additions and 73 deletions
--- a/hotspot/build/linux/makefiles/mapfile-vers-debug
+++ b/hotspot/build/linux/makefiles/mapfile-vers-debug
@ -273,6 +273,8 @@ SUNWprivate_1.1 {
                jio_vfprintf;
                jio_vsnprintf;
                fork1;
                numa_warn;
                numa_error;
                # Needed because there is no JVM interface for this.
                sysThreadAvailableStackWithSlack;
--- a/hotspot/build/linux/makefiles/mapfile-vers-product
+++ b/hotspot/build/linux/makefiles/mapfile-vers-product
@ -268,6 +268,8 @@ SUNWprivate_1.1 {
                jio_vfprintf;
                jio_vsnprintf;
                fork1;
                numa_warn;
                numa_error;
                # Needed because there is no JVM interface for this.
                sysThreadAvailableStackWithSlack;
--- a/hotspot/src/os/linux/vm/os_linux.cpp
+++ b/hotspot/src/os/linux/vm/os_linux.cpp
@ -2228,20 +2228,42 @@ bool os::commit_memory(char* addr, size_t size, size_t alignment_hint) {
 }
 void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) { }
-void os::free_memory(char *addr, size_t bytes)         { }
+
 void os::free_memory(char *addr, size_t bytes) {
  uncommit_memory(addr, bytes);
 }
 void os::numa_make_global(char *addr, size_t bytes)    { }
-void os::numa_make_local(char *addr, size_t bytes)     { }
+
 void os::numa_make_local(char *addr, size_t bytes, int lgrp_hint) {
  Linux::numa_tonode_memory(addr, bytes, lgrp_hint);
 }
 bool os::numa_topology_changed()   { return false; }
-size_t os::numa_get_groups_num()                       { return 1; }
+
-int os::numa_get_group_id()                            { return 0; }
+size_t os::numa_get_groups_num() {
-size_t os::numa_get_leaf_groups(int *ids, size_t size) {
+  int max_node = Linux::numa_max_node();
-  if (size > 0) {
+  return max_node > 0 ? max_node + 1 : 1;
-    ids[0] = 0;
+}
-    return 1;
+
 int os::numa_get_group_id() {
  int cpu_id = Linux::sched_getcpu();
  if (cpu_id != -1) {
    int lgrp_id = Linux::get_node_by_cpu(cpu_id);
    if (lgrp_id != -1) {
      return lgrp_id;
    }
  }
  return 0;
 }
 size_t os::numa_get_leaf_groups(int *ids, size_t size) {
  for (size_t i = 0; i < size; i++) {
    ids[i] = i;
  }
  return size;
 }
 bool os::get_page_info(char *start, page_info* info) {
  return false;
 }
@ -2250,6 +2272,74 @@ char *os::scan_pages(char *start, char* end, page_info* page_expected, page_info
  return end;
 }
 extern "C" void numa_warn(int number, char *where, ...) { }
 extern "C" void numa_error(char *where) { }
 void os::Linux::libnuma_init() {
  // sched_getcpu() should be in libc.
  set_sched_getcpu(CAST_TO_FN_PTR(sched_getcpu_func_t,
                                  dlsym(RTLD_DEFAULT, "sched_getcpu")));
  if (sched_getcpu() != -1) { // Does it work?
    void *handle = dlopen("libnuma.so", RTLD_LAZY);
    if (handle != NULL) {
      set_numa_node_to_cpus(CAST_TO_FN_PTR(numa_node_to_cpus_func_t,
                                           dlsym(handle, "numa_node_to_cpus")));
      set_numa_max_node(CAST_TO_FN_PTR(numa_max_node_func_t,
                                       dlsym(handle, "numa_max_node")));
      set_numa_available(CAST_TO_FN_PTR(numa_available_func_t,
                                        dlsym(handle, "numa_available")));
      set_numa_tonode_memory(CAST_TO_FN_PTR(numa_tonode_memory_func_t,
                                            dlsym(handle, "numa_tonode_memory")));
      if (numa_available() != -1) {
        // Create a cpu -> node mapping
        _cpu_to_node = new (ResourceObj::C_HEAP) GrowableArray<int>(0, true);
        rebuild_cpu_to_node_map();
      }
    }
  }
 }
 // rebuild_cpu_to_node_map() constructs a table mapping cpud id to node id.
 // The table is later used in get_node_by_cpu().
 void os::Linux::rebuild_cpu_to_node_map() {
  int cpu_num = os::active_processor_count();
  cpu_to_node()->clear();
  cpu_to_node()->at_grow(cpu_num - 1);
  int node_num = numa_get_groups_num();
  int cpu_map_size = (cpu_num + BitsPerLong - 1) / BitsPerLong;
  unsigned long *cpu_map = NEW_C_HEAP_ARRAY(unsigned long, cpu_map_size);
  for (int i = 0; i < node_num; i++) {
    if (numa_node_to_cpus(i, cpu_map, cpu_map_size * sizeof(unsigned long)) != -1) {
      for (int j = 0; j < cpu_map_size; j++) {
        if (cpu_map[j] != 0) {
          for (int k = 0; k < BitsPerLong; k++) {
            if (cpu_map[j] & (1UL << k)) {
              cpu_to_node()->at_put(j * BitsPerLong + k, i);
            }
          }
        }
      }
    }
  }
  FREE_C_HEAP_ARRAY(unsigned long, cpu_map);
 }
 int os::Linux::get_node_by_cpu(int cpu_id) {
  if (cpu_to_node() != NULL && cpu_id >= 0 && cpu_id < cpu_to_node()->length()) {
    return cpu_to_node()->at(cpu_id);
  }
  return -1;
 }
 GrowableArray<int>* os::Linux::_cpu_to_node;
 os::Linux::sched_getcpu_func_t os::Linux::_sched_getcpu;
 os::Linux::numa_node_to_cpus_func_t os::Linux::_numa_node_to_cpus;
 os::Linux::numa_max_node_func_t os::Linux::_numa_max_node;
 os::Linux::numa_available_func_t os::Linux::_numa_available;
 os::Linux::numa_tonode_memory_func_t os::Linux::_numa_tonode_memory;
 bool os::uncommit_memory(char* addr, size_t size) {
  return ::mmap(addr, size,
                PROT_READ|PROT_WRITE|PROT_EXEC,
@ -3552,6 +3642,10 @@ jint os::init_2(void)
          Linux::is_floating_stack() ? "floating stack" : "fixed stack");
  }
  if (UseNUMA) {
    Linux::libnuma_init();
  }
  if (MaxFDLimit) {
    // set the number of file descriptors to max. print out error
    // if getrlimit/setrlimit fails but continue regardless.
--- a/hotspot/src/os/linux/vm/os_linux.hpp
+++ b/hotspot/src/os/linux/vm/os_linux.hpp
@ -59,6 +59,8 @@ class Linux {
  static bool _is_NPTL;
  static bool _supports_fast_thread_cpu_time;
  static GrowableArray<int>* _cpu_to_node;
 protected:
  static julong _physical_memory;
@ -79,8 +81,9 @@ class Linux {
  static void set_is_LinuxThreads()           { _is_NPTL = false; }
  static void set_is_floating_stack()         { _is_floating_stack = true; }
  static void rebuild_cpu_to_node_map();
  static GrowableArray<int>* cpu_to_node()    { return _cpu_to_node; }
 public:
  static void init_thread_fpu_state();
  static int  get_fpu_control_word();
  static void set_fpu_control_word(int fpu_control);
@ -143,6 +146,7 @@ class Linux {
  static bool is_floating_stack()             { return _is_floating_stack; }
  static void libpthread_init();
  static void libnuma_init();
  // Minimum stack size a thread can be created with (allowing
  // the VM to completely create the thread and enter user code)
@ -229,6 +233,38 @@ class Linux {
    #undef SR_SUSPENDED
  };
 private:
  typedef int (*sched_getcpu_func_t)(void);
  typedef int (*numa_node_to_cpus_func_t)(int node, unsigned long *buffer, int bufferlen);
  typedef int (*numa_max_node_func_t)(void);
  typedef int (*numa_available_func_t)(void);
  typedef int (*numa_tonode_memory_func_t)(void *start, size_t size, int node);
  static sched_getcpu_func_t _sched_getcpu;
  static numa_node_to_cpus_func_t _numa_node_to_cpus;
  static numa_max_node_func_t _numa_max_node;
  static numa_available_func_t _numa_available;
  static numa_tonode_memory_func_t _numa_tonode_memory;
  static void set_sched_getcpu(sched_getcpu_func_t func) { _sched_getcpu = func; }
  static void set_numa_node_to_cpus(numa_node_to_cpus_func_t func) { _numa_node_to_cpus = func; }
  static void set_numa_max_node(numa_max_node_func_t func) { _numa_max_node = func; }
  static void set_numa_available(numa_available_func_t func) { _numa_available = func; }
  static void set_numa_tonode_memory(numa_tonode_memory_func_t func) { _numa_tonode_memory = func; }
 public:
  static int sched_getcpu()  { return _sched_getcpu != NULL ? _sched_getcpu() : -1; }
  static int numa_node_to_cpus(int node, unsigned long *buffer, int bufferlen) {
    return _numa_node_to_cpus != NULL ? _numa_node_to_cpus(node, buffer, bufferlen) : -1;
  }
  static int numa_max_node() { return _numa_max_node != NULL ? _numa_max_node() : -1; }
  static int numa_available() { return _numa_available != NULL ? _numa_available() : -1; }
  static int numa_tonode_memory(void *start, size_t size, int node) {
    return _numa_tonode_memory != NULL ? _numa_tonode_memory(start, size, node) : -1;
  }
  static int get_node_by_cpu(int cpu_id);
 };
--- a/hotspot/src/os/linux/vm/os_linux.inline.hpp
+++ b/hotspot/src/os/linux/vm/os_linux.inline.hpp
@ -120,3 +120,6 @@ inline int os::closedir(DIR *dirp)
  RESTARTABLE(_cmd, _result); \
  return _result; \
 } while(false)
 inline bool os::numa_has_static_binding()   { return true; }
 inline bool os::numa_has_group_homing()     { return false;  }
--- a/hotspot/src/os/solaris/vm/os_solaris.cpp
+++ b/hotspot/src/os/solaris/vm/os_solaris.cpp
@ -2602,7 +2602,7 @@ void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
 }
 // Tell the OS to make the range local to the first-touching LWP
-void os::numa_make_local(char *addr, size_t bytes) {
+void os::numa_make_local(char *addr, size_t bytes, int lgrp_hint) {
  assert((intptr_t)addr % os::vm_page_size() == 0, "Address should be page-aligned.");
  if (madvise(addr, bytes, MADV_ACCESS_LWP) < 0) {
    debug_only(warning("MADV_ACCESS_LWP failed."));
--- a/hotspot/src/os/solaris/vm/os_solaris.inline.hpp
+++ b/hotspot/src/os/solaris/vm/os_solaris.inline.hpp
@ -204,3 +204,6 @@ do { \
  RESTARTABLE(_cmd, _result); \
  return _result; \
 } while(false)
 inline bool os::numa_has_static_binding()   { return false; }
 inline bool os::numa_has_group_homing()     { return true;  }
--- a/hotspot/src/os/windows/vm/os_windows.cpp
+++ b/hotspot/src/os/windows/vm/os_windows.cpp
@ -2581,7 +2581,7 @@ bool os::unguard_memory(char* addr, size_t bytes) {
 void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) { }
 void os::free_memory(char *addr, size_t bytes)         { }
 void os::numa_make_global(char *addr, size_t bytes)    { }
-void os::numa_make_local(char *addr, size_t bytes)     { }
+void os::numa_make_local(char *addr, size_t bytes, int lgrp_hint)    { }
 bool os::numa_topology_changed()                       { return false; }
 size_t os::numa_get_groups_num()                       { return 1; }
 int os::numa_get_group_id()                            { return 0; }
--- a/hotspot/src/os/windows/vm/os_windows.inline.hpp
+++ b/hotspot/src/os/windows/vm/os_windows.inline.hpp
@ -69,3 +69,6 @@ inline void os::bang_stack_shadow_pages() {
    *((int *)(sp - (pages * vm_page_size()))) = 0;
  }
 }
 inline bool os::numa_has_static_binding()   { return true;   }
 inline bool os::numa_has_group_homing()     { return false;  }
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp
@ -169,8 +169,9 @@ class ParallelScavengeHeap : public CollectedHeap {
  size_t large_typearray_limit() { return FastAllocateSizeLimit; }
  bool supports_inline_contig_alloc() const { return !UseNUMA; }
-  HeapWord** top_addr() const { return !UseNUMA ? young_gen()->top_addr() : NULL; }
+
-  HeapWord** end_addr() const { return !UseNUMA ? young_gen()->end_addr() : NULL; }
+  HeapWord** top_addr() const { return !UseNUMA ? young_gen()->top_addr() : (HeapWord**)-1; }
  HeapWord** end_addr() const { return !UseNUMA ? young_gen()->end_addr() : (HeapWord**)-1; }
  void ensure_parsability(bool retire_tlabs);
  void accumulate_statistics_all_tlabs();
--- a/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp
+++ b/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp
@ -46,10 +46,12 @@ void MutableNUMASpace::mangle_unused_area() {
  for (int i = 0; i < lgrp_spaces()->length(); i++) {
    LGRPSpace *ls = lgrp_spaces()->at(i);
    MutableSpace *s = ls->space();
    if (!os::numa_has_static_binding()) {
      HeapWord *top = MAX2((HeapWord*)round_down((intptr_t)s->top(), page_size()), s->bottom());
      if (top < s->end()) {
        ls->add_invalid_region(MemRegion(top, s->end()));
      }
    }
    s->mangle_unused_area();
  }
 }
@ -70,6 +72,7 @@ void MutableNUMASpace::ensure_parsability() {
                                    area_touched_words);
        }
 #endif
        if (!os::numa_has_static_binding()) {
          MemRegion invalid;
          HeapWord *crossing_start = (HeapWord*)round_to((intptr_t)s->top(), os::vm_page_size());
          HeapWord *crossing_end = (HeapWord*)round_to((intptr_t)(s->top() + area_touched_words),
@ -84,9 +87,11 @@ void MutableNUMASpace::ensure_parsability() {
          }
          ls->add_invalid_region(invalid);
        }
        s->set_top(s->end());
      }
    } else {
      if (!os::numa_has_static_binding()) {
 #ifdef ASSERT
        MemRegion invalid(s->top(), s->end());
        ls->add_invalid_region(invalid);
@ -98,6 +103,7 @@ void MutableNUMASpace::ensure_parsability() {
 #endif
      }
    }
  }
 }
 size_t MutableNUMASpace::used_in_words() const {
@ -194,7 +200,7 @@ bool MutableNUMASpace::update_layout(bool force) {
 }
 // Bias region towards the first-touching lgrp. Set the right page sizes.
-void MutableNUMASpace::bias_region(MemRegion mr) {
+void MutableNUMASpace::bias_region(MemRegion mr, int lgrp_id) {
  HeapWord *start = (HeapWord*)round_to((intptr_t)mr.start(), page_size());
  HeapWord *end = (HeapWord*)round_down((intptr_t)mr.end(), page_size());
  if (end > start) {
@ -202,9 +208,13 @@ void MutableNUMASpace::bias_region(MemRegion mr) {
    assert((intptr_t)aligned_region.start()     % page_size() == 0 &&
           (intptr_t)aligned_region.byte_size() % page_size() == 0, "Bad alignment");
    assert(region().contains(aligned_region), "Sanity");
-    os::free_memory((char*)aligned_region.start(), aligned_region.byte_size());
+    // First we tell the OS which page size we want in the given range. The underlying
    // large page can be broken down if we require small pages.
    os::realign_memory((char*)aligned_region.start(), aligned_region.byte_size(), page_size());
-    os::numa_make_local((char*)aligned_region.start(), aligned_region.byte_size());
+    // Then we uncommit the pages in the range.
    os::free_memory((char*)aligned_region.start(), aligned_region.byte_size());
    // And make them local/first-touch biased.
    os::numa_make_local((char*)aligned_region.start(), aligned_region.byte_size(), lgrp_id);
  }
 }
@ -233,12 +243,14 @@ void MutableNUMASpace::update() {
    initialize(region(), true);
  } else {
    bool should_initialize = false;
    if (!os::numa_has_static_binding()) {
      for (int i = 0; i < lgrp_spaces()->length(); i++) {
        if (!lgrp_spaces()->at(i)->invalid_region().is_empty()) {
          should_initialize = true;
          break;
        }
      }
    }
    if (should_initialize ||
        (UseAdaptiveNUMAChunkSizing && adaptation_cycles() < samples_count())) {
@ -472,8 +484,8 @@ void MutableNUMASpace::initialize(MemRegion mr, bool clear_space) {
      intersection = MemRegion(new_region.start(), new_region.start());
    }
    select_tails(new_region, intersection, &bottom_region, &top_region);
-    bias_region(bottom_region);
+    bias_region(bottom_region, lgrp_spaces()->at(0)->lgrp_id());
-    bias_region(top_region);
+    bias_region(top_region, lgrp_spaces()->at(lgrp_spaces()->length() - 1)->lgrp_id());
  }
  // Check if the space layout has changed significantly?
@ -545,22 +557,37 @@ void MutableNUMASpace::initialize(MemRegion mr, bool clear_space) {
      intersection = MemRegion(new_region.start(), new_region.start());
    }
    if (!os::numa_has_static_binding()) {
      MemRegion invalid_region = ls->invalid_region().intersection(new_region);
      // Invalid region is a range of memory that could've possibly
      // been allocated on the other node. That's relevant only on Solaris where
      // there is no static memory binding.
      if (!invalid_region.is_empty()) {
        merge_regions(new_region, &intersection, &invalid_region);
        free_region(invalid_region);
        ls->set_invalid_region(MemRegion());
      }
    }
    select_tails(new_region, intersection, &bottom_region, &top_region);
    if (!os::numa_has_static_binding()) {
      // If that's a system with the first-touch policy then it's enough
      // to free the pages.
      free_region(bottom_region);
      free_region(top_region);
    } else {
      // In a system with static binding we have to change the bias whenever
      // we reshape the heap.
      bias_region(bottom_region, ls->lgrp_id());
      bias_region(top_region, ls->lgrp_id());
    }
    // If we clear the region, we would mangle it in debug. That would cause page
    // allocation in a different place. Hence setting the top directly.
    s->initialize(new_region, false);
    s->set_top(s->bottom());
    ls->set_invalid_region(MemRegion());
    set_adaptation_cycles(samples_count());
  }
 }
@ -575,7 +602,7 @@ void MutableNUMASpace::set_top(HeapWord* value) {
    HeapWord *top = MAX2((HeapWord*)round_down((intptr_t)s->top(), page_size()), s->bottom());
    if (s->contains(value)) {
-      if (top < value && top < s->end()) {
+      if (!os::numa_has_static_binding() && top < value && top < s->end()) {
        ls->add_invalid_region(MemRegion(top, value));
      }
      s->set_top(value);
@ -584,7 +611,7 @@ void MutableNUMASpace::set_top(HeapWord* value) {
        if (found_top) {
            s->set_top(s->bottom());
        } else {
-            if (top < s->end()) {
+          if (!os::numa_has_static_binding() && top < s->end()) {
            ls->add_invalid_region(MemRegion(top, s->end()));
          }
          s->set_top(s->end());
@ -601,11 +628,23 @@ void MutableNUMASpace::clear() {
  }
 }
 /*
   Linux supports static memory binding, therefore the most part of the
   logic dealing with the possible invalid page allocation is effectively
   disabled. Besides there is no notion of the home node in Linux. A
   thread is allowed to migrate freely. Although the scheduler is rather
   reluctant to move threads between the nodes. We check for the current
   node every allocation. And with a high probability a thread stays on
   the same node for some time allowing local access to recently allocated
   objects.
 */
 HeapWord* MutableNUMASpace::allocate(size_t size) {
-  int lgrp_id = Thread::current()->lgrp_id();
+  Thread* thr = Thread::current();
-  if (lgrp_id == -1) {
+  int lgrp_id = thr->lgrp_id();
  if (lgrp_id == -1 || !os::numa_has_group_homing()) {
    lgrp_id = os::numa_get_group_id();
-    Thread::current()->set_lgrp_id(lgrp_id);
+    thr->set_lgrp_id(lgrp_id);
  }
  int i = lgrp_spaces()->find(&lgrp_id, LGRPSpace::equals);
@ -628,22 +667,22 @@ HeapWord* MutableNUMASpace::allocate(size_t size) {
      MutableSpace::set_top(s->top());
    }
  }
-  // Make the page allocation happen here.
+  // Make the page allocation happen here if there is no static binding..
-  if (p != NULL) {
+  if (p != NULL && !os::numa_has_static_binding()) {
    for (HeapWord *i = p; i < p + size; i += os::vm_page_size() >> LogHeapWordSize) {
      *(int*)i = 0;
    }
  }
  return p;
 }
 // This version is lock-free.
 HeapWord* MutableNUMASpace::cas_allocate(size_t size) {
-  int lgrp_id = Thread::current()->lgrp_id();
+  Thread* thr = Thread::current();
-  if (lgrp_id == -1) {
+  int lgrp_id = thr->lgrp_id();
  if (lgrp_id == -1 || !os::numa_has_group_homing()) {
    lgrp_id = os::numa_get_group_id();
-    Thread::current()->set_lgrp_id(lgrp_id);
+    thr->set_lgrp_id(lgrp_id);
  }
  int i = lgrp_spaces()->find(&lgrp_id, LGRPSpace::equals);
@ -670,8 +709,8 @@ HeapWord* MutableNUMASpace::cas_allocate(size_t size) {
    }
  }
-  // Make the page allocation happen here.
+  // Make the page allocation happen here if there is no static binding.
-  if (p != NULL) {
+  if (p != NULL && !os::numa_has_static_binding() ) {
    for (HeapWord *i = p; i < p + size; i += os::vm_page_size() >> LogHeapWordSize) {
      *(int*)i = 0;
    }
--- a/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp
+++ b/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp
@ -139,8 +139,8 @@ class MutableNUMASpace : public MutableSpace {
  // Check if the NUMA topology has changed. Add and remove spaces if needed.
  // The update can be forced by setting the force parameter equal to true.
  bool update_layout(bool force);
-  // Bias region towards the first-touching lgrp.
+  // Bias region towards the lgrp.
-  void bias_region(MemRegion mr);
+  void bias_region(MemRegion mr, int lgrp_id);
  // Free pages in a given region.
  void free_region(MemRegion mr);
  // Get current chunk size.
--- a/hotspot/src/share/vm/includeDB_core
+++ b/hotspot/src/share/vm/includeDB_core
@ -3181,6 +3181,7 @@ os_<os_family>.cpp                      events.hpp
 os_<os_family>.cpp                      extendedPC.hpp
 os_<os_family>.cpp                      filemap.hpp
 os_<os_family>.cpp                      globals.hpp
 os_<os_family>.cpp                      growableArray.hpp
 os_<os_family>.cpp                      hpi.hpp
 os_<os_family>.cpp                      icBuffer.hpp
 os_<os_family>.cpp                      interfaceSupport.hpp
--- a/hotspot/src/share/vm/runtime/os.hpp
+++ b/hotspot/src/share/vm/runtime/os.hpp
@ -33,6 +33,7 @@ class JavaThread;
 class Event;
 class DLL;
 class FileHandle;
 template<class E> class GrowableArray;
 // %%%%% Moved ThreadState, START_FN, OSThread to new osThread.hpp. -- Rose
@ -206,7 +207,9 @@ class os: AllStatic {
  static void   realign_memory(char *addr, size_t bytes, size_t alignment_hint);
  // NUMA-specific interface
-  static void   numa_make_local(char *addr, size_t bytes);
+  static bool   numa_has_static_binding();
  static bool   numa_has_group_homing();
  static void   numa_make_local(char *addr, size_t bytes, int lgrp_hint);
  static void   numa_make_global(char *addr, size_t bytes);
  static size_t numa_get_groups_num();
  static size_t numa_get_leaf_groups(int *ids, size_t size);