From: Steven Noonan Date: Thu, 12 Jan 2017 01:13:05 +0000 (-0800) Subject: add support for tracking NUMA locality metrics X-Git-Tag: v1.5.0~13^2 X-Git-Url: https://arthur.barton.de/gitweb/?a=commitdiff_plain;ds=sidebyside;h=61b0832e4bc7ea2867245bfd2ebf006f02da6dd7;p=netdata.git add support for tracking NUMA locality metrics Note that the 'hit' and 'miss' metrics are not used, because they are sums of other metrics in the same file. Signed-off-by: Steven Noonan --- diff --git a/README.md b/README.md index 81b1c146..a6e3b731 100644 --- a/README.md +++ b/README.md @@ -107,7 +107,7 @@ This is a list of what it currently monitors: usage, interrupts, softirqs, frequency, total and per core - **Memory**
- RAM, swap and kernel memory usage, including KSM the kernel memory deduper + RAM, swap and kernel memory usage, KSM (Kernel Samepage Merging), NUMA - **Disks**
per disk: I/O, operations, backlog, utilization, space diff --git a/src/Makefile.am b/src/Makefile.am index b5c83883..314f600f 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -52,6 +52,7 @@ netdata_SOURCES = \ simple_pattern.c simple_pattern.h \ sys_fs_cgroup.c \ sys_devices_system_edac_mc.c \ + sys_devices_system_node.c \ procfile.c procfile.h \ proc_self_mountinfo.c proc_self_mountinfo.h \ registry.c registry.h \ diff --git a/src/plugin_proc.c b/src/plugin_proc.c index bf0e6fb0..9b66b7c2 100644 --- a/src/plugin_proc.c +++ b/src/plugin_proc.c @@ -29,6 +29,7 @@ static struct proc_module { { .name = "/proc/meminfo", .dim = "meminfo", .func = do_proc_meminfo }, { .name = "/sys/kernel/mm/ksm", .dim = "ksm", .func = do_sys_kernel_mm_ksm }, { .name = "/sys/devices/system/edac/mc", .dim = "ecc", .func = do_proc_sys_devices_system_edac_mc }, + { .name = "/sys/devices/system/node", .dim = "numa", .func = do_proc_sys_devices_system_node }, // network metrics { .name = "/proc/net/dev", .dim = "netdev", .func = do_proc_net_dev }, @@ -151,3 +152,37 @@ void *proc_main(void *ptr) { pthread_exit(NULL); return NULL; } + +int get_numa_node_count(void) +{ + static int numa_node_count = -1; + + if (numa_node_count != -1) + return numa_node_count; + + numa_node_count = 0; + + char name[FILENAME_MAX + 1]; + snprintfz(name, FILENAME_MAX, "%s%s", global_host_prefix, "/sys/devices/system/node"); + char *dirname = config_get("plugin:proc:/sys/devices/system/node", "directory to monitor", name); + + DIR *dir = opendir(dirname); + if(dir) { + struct dirent *de = NULL; + while((de = readdir(dir))) { + if(de->d_type != DT_DIR) + continue; + + if(strncmp(de->d_name, "node", 4) != 0) + continue; + + if(!isdigit(de->d_name[4])) + continue; + + numa_node_count++; + } + closedir(dir); + } + + return numa_node_count; +} diff --git a/src/plugin_proc.h b/src/plugin_proc.h index 2ee2b6b2..5dee7853 100644 --- a/src/plugin_proc.h +++ b/src/plugin_proc.h @@ -24,5 +24,8 @@ extern int do_proc_net_stat_synproxy(int update_every, usec_t dt); extern int do_proc_net_softnet_stat(int update_every, usec_t dt); extern int do_proc_uptime(int update_every, usec_t dt); extern int do_proc_sys_devices_system_edac_mc(int update_every, usec_t dt); +extern int do_proc_sys_devices_system_node(int update_every, usec_t dt); + +extern int get_numa_node_count(void); #endif /* NETDATA_PLUGIN_PROC_H */ diff --git a/src/proc_vmstat.c b/src/proc_vmstat.c index 2f5666cf..3e79986a 100644 --- a/src/proc_vmstat.c +++ b/src/proc_vmstat.c @@ -4,7 +4,8 @@ int do_proc_vmstat(int update_every, usec_t dt) { (void)dt; static procfile *ff = NULL; - static int do_swapio = -1, do_io = -1, do_pgfaults = -1; + static int do_swapio = -1, do_io = -1, do_pgfaults = -1, do_numa = -1; + static int has_numa = -1; // static uint32_t hash_allocstall_dma = 0; // static uint32_t hash_allocstall_dma32 = 0; @@ -67,17 +68,17 @@ int do_proc_vmstat(int update_every, usec_t dt) { // static uint32_t hash_nr_zone_unevictable = 0; // static uint32_t hash_nr_zone_write_pending = 0; // static uint32_t hash_nr_zspages = 0; - // static uint32_t hash_numa_foreign = 0; - // static uint32_t hash_numa_hint_faults = 0; - // static uint32_t hash_numa_hint_faults_local = 0; - // static uint32_t hash_numa_hit = 0; - // static uint32_t hash_numa_huge_pte_updates = 0; - // static uint32_t hash_numa_interleave = 0; - // static uint32_t hash_numa_local = 0; - // static uint32_t hash_numa_miss = 0; - // static uint32_t hash_numa_other = 0; - // static uint32_t hash_numa_pages_migrated = 0; - // static uint32_t hash_numa_pte_updates = 0; + static uint32_t hash_numa_foreign = 0; + static uint32_t hash_numa_hint_faults = 0; + static uint32_t hash_numa_hint_faults_local = 0; + //static uint32_t hash_numa_hit = 0; + static uint32_t hash_numa_huge_pte_updates = 0; + static uint32_t hash_numa_interleave = 0; + static uint32_t hash_numa_local = 0; + //static uint32_t hash_numa_miss = 0; + static uint32_t hash_numa_other = 0; + static uint32_t hash_numa_pages_migrated = 0; + static uint32_t hash_numa_pte_updates = 0; // static uint32_t hash_pageoutrun = 0; // static uint32_t hash_pgactivate = 0; // static uint32_t hash_pgalloc_dma = 0; @@ -136,6 +137,7 @@ int do_proc_vmstat(int update_every, usec_t dt) { do_swapio = config_get_boolean_ondemand("plugin:proc:/proc/vmstat", "swap i/o", CONFIG_ONDEMAND_ONDEMAND); do_io = config_get_boolean("plugin:proc:/proc/vmstat", "disk i/o", 1); do_pgfaults = config_get_boolean("plugin:proc:/proc/vmstat", "memory page faults", 1); + do_numa = config_get_boolean_ondemand("plugin:proc:/proc/vmstat", "system-wide numa metric summary", CONFIG_ONDEMAND_ONDEMAND); // hash_allocstall_dma32 = simple_hash("allocstall_dma32"); // hash_allocstall_dma = simple_hash("allocstall_dma"); @@ -198,17 +200,17 @@ int do_proc_vmstat(int update_every, usec_t dt) { // hash_nr_zone_unevictable = simple_hash("nr_zone_unevictable"); // hash_nr_zone_write_pending = simple_hash("nr_zone_write_pending"); // hash_nr_zspages = simple_hash("nr_zspages"); - // hash_numa_foreign = simple_hash("numa_foreign"); - // hash_numa_hint_faults_local = simple_hash("numa_hint_faults_local"); - // hash_numa_hint_faults = simple_hash("numa_hint_faults"); - // hash_numa_hit = simple_hash("numa_hit"); - // hash_numa_huge_pte_updates = simple_hash("numa_huge_pte_updates"); - // hash_numa_interleave = simple_hash("numa_interleave"); - // hash_numa_local = simple_hash("numa_local"); - // hash_numa_miss = simple_hash("numa_miss"); - // hash_numa_other = simple_hash("numa_other"); - // hash_numa_pages_migrated = simple_hash("numa_pages_migrated"); - // hash_numa_pte_updates = simple_hash("numa_pte_updates"); + hash_numa_foreign = simple_hash("numa_foreign"); + hash_numa_hint_faults_local = simple_hash("numa_hint_faults_local"); + hash_numa_hint_faults = simple_hash("numa_hint_faults"); + //hash_numa_hit = simple_hash("numa_hit"); + hash_numa_huge_pte_updates = simple_hash("numa_huge_pte_updates"); + hash_numa_interleave = simple_hash("numa_interleave"); + hash_numa_local = simple_hash("numa_local"); + //hash_numa_miss = simple_hash("numa_miss"); + hash_numa_other = simple_hash("numa_other"); + hash_numa_pages_migrated = simple_hash("numa_pages_migrated"); + hash_numa_pte_updates = simple_hash("numa_pte_updates"); // hash_pageoutrun = simple_hash("pageoutrun"); // hash_pgactivate = simple_hash("pgactivate"); // hash_pgalloc_dma32 = simple_hash("pgalloc_dma32"); @@ -337,17 +339,17 @@ int do_proc_vmstat(int update_every, usec_t dt) { // unsigned long long nr_zone_unevictable = 0ULL; // unsigned long long nr_zone_write_pending = 0ULL; // unsigned long long nr_zspages = 0ULL; - // unsigned long long numa_foreign = 0ULL; - // unsigned long long numa_hint_faults = 0ULL; - // unsigned long long numa_hint_faults_local = 0ULL; - // unsigned long long numa_hit = 0ULL; - // unsigned long long numa_huge_pte_updates = 0ULL; - // unsigned long long numa_interleave = 0ULL; - // unsigned long long numa_local = 0ULL; - // unsigned long long numa_miss = 0ULL; - // unsigned long long numa_other = 0ULL; - // unsigned long long numa_pages_migrated = 0ULL; - // unsigned long long numa_pte_updates = 0ULL; + unsigned long long numa_foreign = 0ULL; + unsigned long long numa_hint_faults = 0ULL; + unsigned long long numa_hint_faults_local = 0ULL; + //unsigned long long numa_hit = 0ULL; + unsigned long long numa_huge_pte_updates = 0ULL; + unsigned long long numa_interleave = 0ULL; + unsigned long long numa_local = 0ULL; + //unsigned long long numa_miss = 0ULL; + unsigned long long numa_other = 0ULL; + unsigned long long numa_pages_migrated = 0ULL; + unsigned long long numa_pte_updates = 0ULL; // unsigned long long pageoutrun = 0ULL; // unsigned long long pgactivate = 0ULL; // unsigned long long pgalloc_dma = 0ULL; @@ -477,17 +479,17 @@ int do_proc_vmstat(int update_every, usec_t dt) { // else if(unlikely(hash == hash_nr_zone_unevictable && strcmp(name, "nr_zone_unevictable") == 0)) nr_zone_unevictable = strtoull(value, NULL, 10); // else if(unlikely(hash == hash_nr_zone_write_pending && strcmp(name, "nr_zone_write_pending") == 0)) nr_zone_write_pending = strtoull(value, NULL, 10); // else if(unlikely(hash == hash_nr_zspages && strcmp(name, "nr_zspages") == 0)) nr_zspages = strtoull(value, NULL, 10); - // else if(unlikely(hash == hash_numa_foreign && strcmp(name, "numa_foreign") == 0)) numa_foreign = strtoull(value, NULL, 10); - // else if(unlikely(hash == hash_numa_hint_faults_local && strcmp(name, "numa_hint_faults_local") == 0)) numa_hint_faults_local = strtoull(value, NULL, 10); - // else if(unlikely(hash == hash_numa_hint_faults && strcmp(name, "numa_hint_faults") == 0)) numa_hint_faults = strtoull(value, NULL, 10); - // else if(unlikely(hash == hash_numa_hit && strcmp(name, "numa_hit") == 0)) numa_hit = strtoull(value, NULL, 10); - // else if(unlikely(hash == hash_numa_huge_pte_updates && strcmp(name, "numa_huge_pte_updates") == 0)) numa_huge_pte_updates = strtoull(value, NULL, 10); - // else if(unlikely(hash == hash_numa_interleave && strcmp(name, "numa_interleave") == 0)) numa_interleave = strtoull(value, NULL, 10); - // else if(unlikely(hash == hash_numa_local && strcmp(name, "numa_local") == 0)) numa_local = strtoull(value, NULL, 10); - // else if(unlikely(hash == hash_numa_miss && strcmp(name, "numa_miss") == 0)) numa_miss = strtoull(value, NULL, 10); - // else if(unlikely(hash == hash_numa_other && strcmp(name, "numa_other") == 0)) numa_other = strtoull(value, NULL, 10); - // else if(unlikely(hash == hash_numa_pages_migrated && strcmp(name, "numa_pages_migrated") == 0)) numa_pages_migrated = strtoull(value, NULL, 10); - // else if(unlikely(hash == hash_numa_pte_updates && strcmp(name, "numa_pte_updates") == 0)) numa_pte_updates = strtoull(value, NULL, 10); + else if(unlikely(hash == hash_numa_foreign && strcmp(name, "numa_foreign") == 0)) numa_foreign = strtoull(value, NULL, 10); + else if(unlikely(hash == hash_numa_hint_faults_local && strcmp(name, "numa_hint_faults_local") == 0)) numa_hint_faults_local = strtoull(value, NULL, 10); + else if(unlikely(hash == hash_numa_hint_faults && strcmp(name, "numa_hint_faults") == 0)) numa_hint_faults = strtoull(value, NULL, 10); + //else if(unlikely(hash == hash_numa_hit && strcmp(name, "numa_hit") == 0)) numa_hit = strtoull(value, NULL, 10); + else if(unlikely(hash == hash_numa_huge_pte_updates && strcmp(name, "numa_huge_pte_updates") == 0)) numa_huge_pte_updates = strtoull(value, NULL, 10); + else if(unlikely(hash == hash_numa_interleave && strcmp(name, "numa_interleave") == 0)) numa_interleave = strtoull(value, NULL, 10); + else if(unlikely(hash == hash_numa_local && strcmp(name, "numa_local") == 0)) numa_local = strtoull(value, NULL, 10); + //else if(unlikely(hash == hash_numa_miss && strcmp(name, "numa_miss") == 0)) numa_miss = strtoull(value, NULL, 10); + else if(unlikely(hash == hash_numa_other && strcmp(name, "numa_other") == 0)) numa_other = strtoull(value, NULL, 10); + else if(unlikely(hash == hash_numa_pages_migrated && strcmp(name, "numa_pages_migrated") == 0)) numa_pages_migrated = strtoull(value, NULL, 10); + else if(unlikely(hash == hash_numa_pte_updates && strcmp(name, "numa_pte_updates") == 0)) numa_pte_updates = strtoull(value, NULL, 10); // else if(unlikely(hash == hash_pageoutrun && strcmp(name, "pageoutrun") == 0)) pageoutrun = strtoull(value, NULL, 10); // else if(unlikely(hash == hash_pgactivate && strcmp(name, "pgactivate") == 0)) pgactivate = strtoull(value, NULL, 10); // else if(unlikely(hash == hash_pgalloc_dma32 && strcmp(name, "pgalloc_dma32") == 0)) pgalloc_dma32 = strtoull(value, NULL, 10); @@ -597,6 +599,54 @@ int do_proc_vmstat(int update_every, usec_t dt) { rrdset_done(st_pgfaults); } + // -------------------------------------------------------------------- + + // Ondemand criteria for NUMA. Since this won't change at run time, we + // check it only once. We check whether the node count is >= 2 because + // single-node systems have uninteresting statistics (since all accesses + // are local). + if(unlikely(has_numa == -1)) { + has_numa = (get_numa_node_count() >= 2 && + (numa_local || numa_foreign || numa_interleave || numa_other || numa_pte_updates || + numa_huge_pte_updates || numa_hint_faults || numa_hint_faults_local || numa_pages_migrated)) ? 1 : 0; + } + + if(do_numa == CONFIG_ONDEMAND_YES || (do_numa == CONFIG_ONDEMAND_ONDEMAND && has_numa)) { + static RRDSET *st_numa = NULL; + if(unlikely(!st_numa)) { + st_numa = rrdset_create("mem", "numa", NULL, "numa", NULL, "NUMA events", "events/s", 800, update_every, RRDSET_TYPE_LINE); + st_numa->isdetail = 1; + + // These depend on CONFIG_NUMA in the kernel. + rrddim_add(st_numa, "local", NULL, 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st_numa, "foreign", NULL, 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st_numa, "interleave", NULL, 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st_numa, "other", NULL, 1, 1, RRDDIM_INCREMENTAL); + + // The following stats depend on CONFIG_NUMA_BALANCING in the + // kernel. + rrddim_add(st_numa, "pte updates", NULL, 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st_numa, "huge pte updates", NULL, 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st_numa, "hint faults", NULL, 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st_numa, "hint faults local", NULL, 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st_numa, "pages migrated", NULL, 1, 1, RRDDIM_INCREMENTAL); + } + else rrdset_next(st_numa); + + rrddim_set(st_numa, "local", numa_local); + rrddim_set(st_numa, "foreign", numa_foreign); + rrddim_set(st_numa, "interleave", numa_interleave); + rrddim_set(st_numa, "other", numa_other); + + rrddim_set(st_numa, "pte updates", numa_pte_updates); + rrddim_set(st_numa, "huge pte updates", numa_huge_pte_updates); + rrddim_set(st_numa, "hint faults", numa_hint_faults); + rrddim_set(st_numa, "hint faults local", numa_hint_faults_local); + rrddim_set(st_numa, "pages migrated", numa_pages_migrated); + + rrdset_done(st_numa); + } + return 0; } diff --git a/src/sys_devices_system_node.c b/src/sys_devices_system_node.c new file mode 100644 index 00000000..18c3fcd3 --- /dev/null +++ b/src/sys_devices_system_node.c @@ -0,0 +1,127 @@ +#include "common.h" + +struct node { + char *name; + char *numastat_filename; + procfile *numastat_ff; + RRDSET *numastat_st; + struct node *next; +}; +static struct node *numa_root = NULL; + +static int find_all_nodes() { + int numa_node_count = 0; + char name[FILENAME_MAX + 1]; + snprintfz(name, FILENAME_MAX, "%s%s", global_host_prefix, "/sys/devices/system/node"); + char *dirname = config_get("plugin:proc:/sys/devices/system/node", "directory to monitor", name); + + DIR *dir = opendir(dirname); + if(!dir) { + error("Cannot read NUMA node directory '%s'", dirname); + return 0; + } + + struct dirent *de = NULL; + while((de = readdir(dir))) { + if(de->d_type != DT_DIR) + continue; + + if(strncmp(de->d_name, "node", 4) != 0) + continue; + + if(!isdigit(de->d_name[4])) + continue; + + numa_node_count++; + + struct node *m = callocz(1, sizeof(struct node)); + m->name = strdupz(de->d_name); + + struct stat st; + + snprintfz(name, FILENAME_MAX, "%s/%s/numastat", dirname, de->d_name); + if(stat(name, &st) == -1) { + freez(m->name); + freez(m); + continue; + } + + m->numastat_filename = strdupz(name); + + m->next = numa_root; + numa_root = m; + } + + closedir(dir); + + return numa_node_count; +} + +int do_proc_sys_devices_system_node(int update_every, usec_t dt) { + (void)dt; + + static int numa_node_count = 0; + + if(unlikely(numa_root == NULL)) { + numa_node_count = find_all_nodes(update_every); + if(unlikely(numa_root == NULL)) + return 1; + } + + static int do_numastat = -1; + struct node *m; + + if(unlikely(do_numastat == -1)) { + do_numastat = config_get_boolean_ondemand("plugin:proc:/sys/devices/system/node", "enable per-node numa metrics", CONFIG_ONDEMAND_ONDEMAND); + } + + if(do_numastat == CONFIG_ONDEMAND_YES || (do_numastat == CONFIG_ONDEMAND_ONDEMAND && numa_node_count >= 2)) { + for(m = numa_root; m; m = m->next) { + if(m->numastat_filename) { + if(unlikely(!m->numastat_ff)) { + m->numastat_ff = procfile_open(m->numastat_filename, " ", PROCFILE_FLAG_DEFAULT); + if(unlikely(!m->numastat_ff)) + continue; + } + + m->numastat_ff = procfile_readall(m->numastat_ff); + if(unlikely(!m->numastat_ff || procfile_lines(m->numastat_ff) < 1 || procfile_linewords(m->numastat_ff, 0) < 1)) + continue; + + procfile *ff = m->numastat_ff; + + RRDSET *st = m->numastat_st; + if(unlikely(!st)) { + st = rrdset_create("mem", m->name, NULL, "numa", NULL, "NUMA events", "events/s", 1000, update_every, RRDSET_TYPE_LINE); + st->isdetail = 1; + + rrddim_add(st, "local_node", "local", 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "numa_foreign", "foreign", 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "interleave_hit", "interleave", 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "other_node", "other", 1, 1, RRDDIM_INCREMENTAL); + + m->numastat_st = st; + } + else rrdset_next(st); + + uint32_t lines = procfile_lines(ff), l; + for(l = 0; l < lines; l++) { + uint32_t words = procfile_linewords(ff, l); + if(unlikely(words < 2)) { + if(unlikely(words)) error("Cannot read %s numastat line %u. Expected 2 params, read %u.", m->name, l, words); + continue; + } + + char *name = procfile_lineword(ff, l, 0); + char *value = procfile_lineword(ff, l, 1); + if (unlikely(!name || !*name || !value || !*value)) continue; + + rrddim_set(st, name, strtoull(value, NULL, 10)); + } + rrdset_done(st); + } + } + } + + return 0; +} diff --git a/web/dashboard_info.js b/web/dashboard_info.js index bb5f2d02..24a579cf 100644 --- a/web/dashboard_info.js +++ b/web/dashboard_info.js @@ -262,6 +262,10 @@ netdataDashboard.submenu = { info: 'Kernel Same-page Merging (KSM) performance monitoring, read from several files in /sys/kernel/mm/ksm/. KSM is a memory-saving de-duplication feature in the Linux kernel (since version 2.6.32). The KSM daemon ksmd periodically scans those areas of user memory which have been registered with it, looking for pages of identical content which can be replaced by a single write-protected page (which is automatically copied if a process later wants to update its content). KSM was originally developed for use with KVM (where it was known as Kernel Shared Memory), to fit more virtual machines into physical memory, by sharing the data common between them. But it can be useful to any application which generates many instances of the same data.' }, + 'mem.numa': { + info: 'Non-Uniform Memory Access (NUMA) is a hierarchical memory design the memory access time is dependent on locality. Under NUMA, a processor can access its own local memory faster than non-local memory (memory local to another processor or memory shared between processors). The individual metrics are described in the Linux kernel documentation.' + }, + 'ipv4.ecn': { info: 'Explicit Congestion Notification (ECN) is a TCP extension that allows end-to-end notification of network congestion without dropping packets. ECN is an optional feature that may be used between two ECN-enabled endpoints when the underlying network infrastructure also supports it.' },