Line data Source code
1 : /*------------------------------------------------------------------------- 2 : * 3 : * pg_numa.c 4 : * Basic NUMA portability routines 5 : * 6 : * 7 : * Copyright (c) 2025, PostgreSQL Global Development Group 8 : * 9 : * 10 : * IDENTIFICATION 11 : * src/port/pg_numa.c 12 : * 13 : *------------------------------------------------------------------------- 14 : */ 15 : 16 : #include "c.h" 17 : #include <unistd.h> 18 : 19 : #include "miscadmin.h" 20 : #include "port/pg_numa.h" 21 : 22 : /* 23 : * At this point we provide support only for Linux thanks to libnuma, but in 24 : * future support for other platforms e.g. Win32 or FreeBSD might be possible 25 : * too. For Win32 NUMA APIs see 26 : * https://learn.microsoft.com/en-us/windows/win32/procthread/numa-support 27 : */ 28 : #ifdef USE_LIBNUMA 29 : 30 : #include <numa.h> 31 : #include <numaif.h> 32 : 33 : /* 34 : * numa_move_pages() chunk size, has to be <= 16 to work around a kernel bug 35 : * in do_pages_stat() (chunked by DO_PAGES_STAT_CHUNK_NR). By using the same 36 : * chunk size, we make it work even on unfixed kernels. 37 : * 38 : * 64-bit system are not affected by the bug, and so use much larger chunks. 39 : */ 40 : #if SIZEOF_SIZE_T == 4 41 : #define NUMA_QUERY_CHUNK_SIZE 16 42 : #else 43 : #define NUMA_QUERY_CHUNK_SIZE 1024 44 : #endif 45 : 46 : /* libnuma requires initialization as per numa(3) on Linux */ 47 : int 48 : pg_numa_init(void) 49 : { 50 : int r = numa_available(); 51 : 52 : return r; 53 : } 54 : 55 : /* 56 : * We use move_pages(2) syscall here - instead of get_mempolicy(2) - as the 57 : * first one allows us to batch and query about many memory pages in one single 58 : * giant system call that is way faster. 59 : * 60 : * We call numa_move_pages() for smaller chunks of the whole array. The first 61 : * reason is to work around a kernel bug, but also to allow interrupting the 62 : * query between the calls (for many pointers processing the whole array can 63 : * take a lot of time). 64 : */ 65 : int 66 : pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status) 67 : { 68 : unsigned long next = 0; 69 : int ret = 0; 70 : 71 : /* 72 : * Chunk pointers passed to numa_move_pages to NUMA_QUERY_CHUNK_SIZE 73 : * items, to work around a kernel bug in do_pages_stat(). 74 : */ 75 : while (next < count) 76 : { 77 : unsigned long count_chunk = Min(count - next, 78 : NUMA_QUERY_CHUNK_SIZE); 79 : 80 : CHECK_FOR_INTERRUPTS(); 81 : 82 : /* 83 : * Bail out if any of the chunks errors out (ret<0). We ignore (ret>0) 84 : * which is used to return number of nonmigrated pages, but we're not 85 : * migrating any pages here. 86 : */ 87 : ret = numa_move_pages(pid, count_chunk, &pages[next], NULL, &status[next], 0); 88 : if (ret < 0) 89 : { 90 : /* plain error, return as is */ 91 : return ret; 92 : } 93 : 94 : next += count_chunk; 95 : } 96 : 97 : /* should have consumed the input array exactly */ 98 : Assert(next == count); 99 : 100 : return 0; 101 : } 102 : 103 : int 104 : pg_numa_get_max_node(void) 105 : { 106 : return numa_max_node(); 107 : } 108 : 109 : #else 110 : 111 : /* Empty wrappers */ 112 : int 113 14 : pg_numa_init(void) 114 : { 115 : /* We state that NUMA is not available */ 116 14 : return -1; 117 : } 118 : 119 : int 120 0 : pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status) 121 : { 122 0 : return 0; 123 : } 124 : 125 : int 126 0 : pg_numa_get_max_node(void) 127 : { 128 0 : return 0; 129 : } 130 : 131 : #endif