9 #include <sys/statvfs.h>
14 #include "appconfig.h"
17 #include "plugin_proc.h"
19 #include "proc_self_mountinfo.h"
21 #define RRD_TYPE_DISK "disk"
26 int partition_id; // -1 = this is not a partition
32 struct disk *get_disk(unsigned long major, unsigned long minor) {
33 static char path_find_block_device_partition[FILENAME_MAX + 1] = "";
34 static struct mountinfo *mountinfo_root = NULL;
37 // search for it in our RAM list.
38 // this is sequential, but since we just walk through
39 // and the number of disks / partitions in a system
40 // should not be that many, it should be acceptable
41 for(d = disk_root; d ; d = d->next)
42 if(unlikely(d->major == major && d->minor == minor))
45 // if we found it, return it
49 if(unlikely(!path_find_block_device_partition[0])) {
50 char filename[FILENAME_MAX + 1];
51 snprintfz(filename, FILENAME_MAX, "%s%s", global_host_prefix, "/sys/dev/block/%lu:%lu/partition");
52 snprintfz(path_find_block_device_partition, FILENAME_MAX, "%s", config_get("plugin:proc:/proc/diskstats", "path to get block device partition", filename));
56 // create a new disk structure
57 d = (struct disk *)malloc(sizeof(struct disk));
58 if(!d) fatal("Cannot allocate memory for struct disk in proc_diskstats.");
65 // append it to the list
70 for(last = disk_root; last->next ;last = last->next);
74 // find if it is a partition
75 // by reading /sys/dev/block/MAJOR:MINOR/partition
76 char buffer[FILENAME_MAX + 1];
77 snprintfz(buffer, FILENAME_MAX, path_find_block_device_partition, major, minor);
79 int fd = open(buffer, O_RDONLY, 0666);
80 if(likely(fd != -1)) {
82 int bytes = read(fd, buffer, FILENAME_MAX);
86 d->partition_id = strtoul(buffer, NULL, 10);
88 // if the /partition file does not exist, it is a disk, not a partition
90 // ------------------------------------------------------------------------
91 // check if we can find its mount point
93 // mountinfo_find() can be called with NULL mountinfo_root
94 struct mountinfo *mi = mountinfo_find(mountinfo_root, d->major, d->minor);
96 // mountinfo_free() can be called with NULL mountinfo_root
97 mountinfo_free(mountinfo_root);
99 // re-read mountinfo in case something changed
100 mountinfo_root = mountinfo_read();
102 // search again for this disk
103 mi = mountinfo_find(mountinfo_root, d->major, d->minor);
107 d->mount_point = strdup(mi->mount_point);
108 // no need to check for NULL
110 d->mount_point = NULL;
115 int do_proc_diskstats(int update_every, unsigned long long dt) {
116 static procfile *ff = NULL;
117 static char path_to_get_hw_sector_size[FILENAME_MAX + 1] = "";
118 static int enable_new_disks = -1;
119 static int do_io = -1, do_ops = -1, do_mops = -1, do_iotime = -1, do_qops = -1, do_util = -1, do_backlog = -1, do_space = -1;
121 if(enable_new_disks == -1) enable_new_disks = config_get_boolean_ondemand("plugin:proc:/proc/diskstats", "enable new disks detected at runtime", CONFIG_ONDEMAND_ONDEMAND);
123 if(do_io == -1) do_io = config_get_boolean_ondemand("plugin:proc:/proc/diskstats", "bandwidth for all disks", CONFIG_ONDEMAND_ONDEMAND);
124 if(do_ops == -1) do_ops = config_get_boolean_ondemand("plugin:proc:/proc/diskstats", "operations for all disks", CONFIG_ONDEMAND_ONDEMAND);
125 if(do_mops == -1) do_mops = config_get_boolean_ondemand("plugin:proc:/proc/diskstats", "merged operations for all disks", CONFIG_ONDEMAND_ONDEMAND);
126 if(do_iotime == -1) do_iotime = config_get_boolean_ondemand("plugin:proc:/proc/diskstats", "i/o time for all disks", CONFIG_ONDEMAND_ONDEMAND);
127 if(do_qops == -1) do_qops = config_get_boolean_ondemand("plugin:proc:/proc/diskstats", "queued operations for all disks", CONFIG_ONDEMAND_ONDEMAND);
128 if(do_util == -1) do_util = config_get_boolean_ondemand("plugin:proc:/proc/diskstats", "utilization percentage for all disks", CONFIG_ONDEMAND_ONDEMAND);
129 if(do_backlog == -1)do_backlog = config_get_boolean_ondemand("plugin:proc:/proc/diskstats", "backlog for all disks", CONFIG_ONDEMAND_ONDEMAND);
130 if(do_space == -1) do_space = config_get_boolean_ondemand("plugin:proc:/proc/diskstats", "space usage for all disks", CONFIG_ONDEMAND_ONDEMAND);
133 char filename[FILENAME_MAX + 1];
134 snprintfz(filename, FILENAME_MAX, "%s%s", global_host_prefix, "/proc/diskstats");
135 ff = procfile_open(config_get("plugin:proc:/proc/diskstats", "filename to monitor", filename), " \t", PROCFILE_FLAG_DEFAULT);
139 if(!path_to_get_hw_sector_size[0]) {
140 char filename[FILENAME_MAX + 1];
141 snprintfz(filename, FILENAME_MAX, "%s%s", global_host_prefix, "/sys/block/%s/queue/hw_sector_size");
142 snprintfz(path_to_get_hw_sector_size, FILENAME_MAX, "%s", config_get("plugin:proc:/proc/diskstats", "path to get h/w sector size", filename));
145 ff = procfile_readall(ff);
146 if(!ff) return 0; // we return 0, so that we will retry to open it next time
148 struct statvfs * buff_statvfs;
149 if ( !(buff_statvfs = (struct statvfs *)
150 malloc(sizeof(struct statvfs)))) {
151 error("Failed to allocate memory to buffer.");
154 uint32_t lines = procfile_lines(ff), l;
157 for(l = 0; l < lines ;l++) {
159 unsigned long long major = 0, minor = 0,
160 reads = 0, mreads = 0, readsectors = 0, readms = 0,
161 writes = 0, mwrites = 0, writesectors = 0, writems = 0,
162 queued_ios = 0, busy_ms = 0, backlog_ms = 0,
163 space_avail = 0, space_avail_root = 0, space_used = 0;
165 unsigned long long last_reads = 0, last_readsectors = 0, last_readms = 0,
166 last_writes = 0, last_writesectors = 0, last_writems = 0,
169 words = procfile_linewords(ff, l);
170 if(words < 14) continue;
172 major = strtoull(procfile_lineword(ff, l, 0), NULL, 10);
173 minor = strtoull(procfile_lineword(ff, l, 1), NULL, 10);
174 disk = procfile_lineword(ff, l, 2);
176 // # of reads completed # of writes completed
177 // This is the total number of reads or writes completed successfully.
178 reads = strtoull(procfile_lineword(ff, l, 3), NULL, 10); // rd_ios
179 writes = strtoull(procfile_lineword(ff, l, 7), NULL, 10); // wr_ios
181 // # of reads merged # of writes merged
182 // Reads and writes which are adjacent to each other may be merged for
183 // efficiency. Thus two 4K reads may become one 8K read before it is
184 // ultimately handed to the disk, and so it will be counted (and queued)
185 mreads = strtoull(procfile_lineword(ff, l, 4), NULL, 10); // rd_merges_or_rd_sec
186 mwrites = strtoull(procfile_lineword(ff, l, 8), NULL, 10); // wr_merges
188 // # of sectors read # of sectors written
189 // This is the total number of sectors read or written successfully.
190 readsectors = strtoull(procfile_lineword(ff, l, 5), NULL, 10); // rd_sec_or_wr_ios
191 writesectors = strtoull(procfile_lineword(ff, l, 9), NULL, 10); // wr_sec
193 // # of milliseconds spent reading # of milliseconds spent writing
194 // This is the total number of milliseconds spent by all reads or writes (as
195 // measured from __make_request() to end_that_request_last()).
196 readms = strtoull(procfile_lineword(ff, l, 6), NULL, 10); // rd_ticks_or_wr_sec
197 writems = strtoull(procfile_lineword(ff, l, 10), NULL, 10); // wr_ticks
199 // # of I/Os currently in progress
200 // The only field that should go to zero. Incremented as requests are
201 // given to appropriate struct request_queue and decremented as they finish.
202 queued_ios = strtoull(procfile_lineword(ff, l, 11), NULL, 10); // ios_pgr
204 // # of milliseconds spent doing I/Os
205 // This field increases so long as field queued_ios is nonzero.
206 busy_ms = strtoull(procfile_lineword(ff, l, 12), NULL, 10); // tot_ticks
208 // weighted # of milliseconds spent doing I/Os
209 // This field is incremented at each I/O start, I/O completion, I/O
210 // merge, or read of these stats by the number of I/Os in progress
211 // (field queued_ios) times the number of milliseconds spent doing I/O since the
212 // last update of this field. This can provide an easy measure of both
213 // I/O completion time and the backlog that may be accumulating.
214 backlog_ms = strtoull(procfile_lineword(ff, l, 13), NULL, 10); // rq_ticks
218 // remove slashes from disk names
220 for(s = disk; *s ;s++) if(*s == '/') *s = '_';
222 struct disk *d = get_disk(major, minor);
223 if(d->partition_id == -1)
224 def_enabled = enable_new_disks;
228 char *mount_point = d->mount_point;
229 char *family = d->mount_point;
230 if(!family) family = disk;
235 case 43: // network block
243 def_enabled = enable_new_disks;
265 if(minor % 8) def_enabled = 0; // partitions
266 else def_enabled = enable_new_disks;
269 case 8: // scsi disks
270 case 65: // scsi disks
271 case 66: // scsi disks
272 case 67: // scsi disks
273 case 68: // scsi disks
274 case 69: // scsi disks
275 case 70: // scsi disks
276 case 71: // scsi disks
277 case 72: // scsi disks
278 case 73: // scsi disks
279 case 74: // scsi disks
280 case 75: // scsi disks
281 case 76: // scsi disks
282 case 77: // scsi disks
283 case 78: // scsi disks
284 case 79: // scsi disks
293 case 101: // hyperdisk
294 case 102: // compressed
303 case 114: // bios raid
304 case 116: // ram board
318 case 259: // nvme0n1 issue #119
319 if(minor % 16) def_enabled = 0; // partitions
320 else def_enabled = enable_new_disks;
325 if(minor % 32) def_enabled = 0; // partitions
326 else def_enabled = enable_new_disks;
340 if(minor % 64) def_enabled = 0; // partitions
341 else def_enabled = enable_new_disks;
354 int ddo_io = do_io, ddo_ops = do_ops, ddo_mops = do_mops, ddo_iotime = do_iotime, ddo_qops = do_qops, ddo_util = do_util, ddo_backlog = do_backlog, ddo_space = do_space;
356 // check which charts are enabled for this disk
358 char var_name[4096 + 1];
359 snprintfz(var_name, 4096, "plugin:proc:/proc/diskstats:%s", disk);
360 def_enabled = config_get_boolean_ondemand(var_name, "enabled", def_enabled);
361 if(def_enabled == CONFIG_ONDEMAND_NO) continue;
362 if(def_enabled == CONFIG_ONDEMAND_ONDEMAND && !reads && !writes) continue;
365 ddo_io = config_get_boolean_ondemand(var_name, "bandwidth", ddo_io);
366 ddo_ops = config_get_boolean_ondemand(var_name, "operations", ddo_ops);
367 ddo_mops = config_get_boolean_ondemand(var_name, "merged operations", ddo_mops);
368 ddo_iotime = config_get_boolean_ondemand(var_name, "i/o time", ddo_iotime);
369 ddo_qops = config_get_boolean_ondemand(var_name, "queued operations", ddo_qops);
370 ddo_util = config_get_boolean_ondemand(var_name, "utilization percentage", ddo_util);
371 ddo_backlog = config_get_boolean_ondemand(var_name, "backlog", ddo_backlog);
372 ddo_space = config_get_boolean_ondemand(var_name, "space", ddo_space);
374 // by default, do not add charts that do not have values
375 if(ddo_io == CONFIG_ONDEMAND_ONDEMAND && !reads && !writes) ddo_io = 0;
376 if(ddo_mops == CONFIG_ONDEMAND_ONDEMAND && mreads == 0 && mwrites == 0) ddo_mops = 0;
377 if(ddo_iotime == CONFIG_ONDEMAND_ONDEMAND && readms == 0 && writems == 0) ddo_iotime = 0;
378 if(ddo_util == CONFIG_ONDEMAND_ONDEMAND && busy_ms == 0) ddo_util = 0;
379 if(ddo_backlog == CONFIG_ONDEMAND_ONDEMAND && backlog_ms == 0) ddo_backlog = 0;
380 if(ddo_qops == CONFIG_ONDEMAND_ONDEMAND && backlog_ms == 0) ddo_qops = 0;
382 // for absolute values, we need to switch the setting to 'yes'
383 // to allow it refresh from now on
384 if(ddo_qops == CONFIG_ONDEMAND_ONDEMAND) config_set(var_name, "queued operations", "yes");
389 // --------------------------------------------------------------------
391 int sector_size = 512;
393 st = rrdset_find_bytype(RRD_TYPE_DISK, disk);
395 char tf[FILENAME_MAX + 1], *t;
396 char ssfilename[FILENAME_MAX + 1];
398 strncpyz(tf, disk, FILENAME_MAX);
400 // replace all / with !
401 while((t = strchr(tf, '/'))) *t = '!';
403 snprintfz(ssfilename, FILENAME_MAX, path_to_get_hw_sector_size, tf);
404 FILE *fpss = fopen(ssfilename, "r");
407 char *tmp = fgets(ssbuffer, 1024, fpss);
410 sector_size = atoi(tmp);
411 if(sector_size <= 0) {
412 error("Invalid sector size %d for device %s in %s. Assuming 512.", sector_size, disk, ssfilename);
416 else error("Cannot read data for sector size for device %s from %s. Assuming 512.", disk, ssfilename);
420 else error("Cannot read sector size for device %s from %s. Assuming 512.", disk, ssfilename);
422 st = rrdset_create(RRD_TYPE_DISK, disk, NULL, family, "disk.io", "Disk I/O Bandwidth", "kilobytes/s", 2000, update_every, RRDSET_TYPE_AREA);
424 rrddim_add(st, "reads", NULL, sector_size, 1024, RRDDIM_INCREMENTAL);
425 rrddim_add(st, "writes", NULL, sector_size * -1, 1024, RRDDIM_INCREMENTAL);
427 else rrdset_next_usec(st, dt);
429 last_readsectors = rrddim_set(st, "reads", readsectors);
430 last_writesectors = rrddim_set(st, "writes", writesectors);
434 // --------------------------------------------------------------------
437 st = rrdset_find_bytype("disk_ops", disk);
439 st = rrdset_create("disk_ops", disk, NULL, family, "disk.ops", "Disk Completed I/O Operations", "operations/s", 2001, update_every, RRDSET_TYPE_LINE);
442 rrddim_add(st, "reads", NULL, 1, 1, RRDDIM_INCREMENTAL);
443 rrddim_add(st, "writes", NULL, -1, 1, RRDDIM_INCREMENTAL);
445 else rrdset_next_usec(st, dt);
447 last_reads = rrddim_set(st, "reads", reads);
448 last_writes = rrddim_set(st, "writes", writes);
452 // --------------------------------------------------------------------
455 st = rrdset_find_bytype("disk_qops", disk);
457 st = rrdset_create("disk_qops", disk, NULL, family, "disk.qops", "Disk Current I/O Operations", "operations", 2002, update_every, RRDSET_TYPE_LINE);
460 rrddim_add(st, "operations", NULL, 1, 1, RRDDIM_ABSOLUTE);
462 else rrdset_next_usec(st, dt);
464 rrddim_set(st, "operations", queued_ios);
468 // --------------------------------------------------------------------
471 st = rrdset_find_bytype("disk_backlog", disk);
473 st = rrdset_create("disk_backlog", disk, NULL, family, "disk.backlog", "Disk Backlog", "backlog (ms)", 2003, update_every, RRDSET_TYPE_AREA);
476 rrddim_add(st, "backlog", NULL, 1, 10, RRDDIM_INCREMENTAL);
478 else rrdset_next_usec(st, dt);
480 rrddim_set(st, "backlog", backlog_ms);
484 // --------------------------------------------------------------------
487 st = rrdset_find_bytype("disk_util", disk);
489 st = rrdset_create("disk_util", disk, NULL, family, "disk.util", "Disk Utilization Time", "% of time working", 2004, update_every, RRDSET_TYPE_AREA);
492 rrddim_add(st, "utilization", NULL, 1, 10, RRDDIM_INCREMENTAL);
494 else rrdset_next_usec(st, dt);
496 last_busy_ms = rrddim_set(st, "utilization", busy_ms);
500 // --------------------------------------------------------------------
503 st = rrdset_find_bytype("disk_mops", disk);
505 st = rrdset_create("disk_mops", disk, NULL, family, "disk.mops", "Disk Merged Operations", "merged operations/s", 2021, update_every, RRDSET_TYPE_LINE);
508 rrddim_add(st, "reads", NULL, 1, 1, RRDDIM_INCREMENTAL);
509 rrddim_add(st, "writes", NULL, -1, 1, RRDDIM_INCREMENTAL);
511 else rrdset_next_usec(st, dt);
513 rrddim_set(st, "reads", mreads);
514 rrddim_set(st, "writes", mwrites);
518 // --------------------------------------------------------------------
521 st = rrdset_find_bytype("disk_iotime", disk);
523 st = rrdset_create("disk_iotime", disk, NULL, family, "disk.iotime", "Disk Total I/O Time", "milliseconds/s", 2022, update_every, RRDSET_TYPE_LINE);
526 rrddim_add(st, "reads", NULL, 1, 1, RRDDIM_INCREMENTAL);
527 rrddim_add(st, "writes", NULL, -1, 1, RRDDIM_INCREMENTAL);
529 else rrdset_next_usec(st, dt);
531 last_readms = rrddim_set(st, "reads", readms);
532 last_writems = rrddim_set(st, "writes", writems);
536 // --------------------------------------------------------------------
540 st = rrdset_find_bytype("disk_space", disk);
542 st = rrdset_create("disk_space", disk, NULL, family, "disk.space", "Disk Space Usage", "Megabyte", 2023, update_every, RRDSET_TYPE_AREA);
545 rrddim_add(st, "avail", NULL, 1, 1048576, RRDDIM_ABSOLUTE);
546 rrddim_add(st, "reserved for root", NULL, 1, 1048576, RRDDIM_ABSOLUTE);
547 rrddim_add(st, "used" , NULL, 1, 1045576, RRDDIM_ABSOLUTE);
549 else rrdset_next_usec(st, dt);
553 if (statvfs(family, buff_statvfs) < 0) {
554 error("Faild checking disk space usage of %s", family);
556 space_avail = buff_statvfs->f_bavail * buff_statvfs->f_bsize;
557 space_avail_root = (buff_statvfs->f_bfree - buff_statvfs->f_bavail) * buff_statvfs->f_bsize;
558 space_used = (buff_statvfs->f_blocks - buff_statvfs->f_bfree) * buff_statvfs->f_bsize;
561 rrddim_set(st, "avail", space_avail);
562 rrddim_set(st, "reserved for root", space_avail_root);
563 rrddim_set(st, "used", space_used);
566 if(ddo_space != CONFIG_ONDEMAND_ONDEMAND) {
567 error("Cannot find space usage for disk %s. It does not have a mount point.", family);
572 // --------------------------------------------------------------------
573 // calculate differential charts
574 // only if this is not the first time we run
577 if(ddo_iotime && ddo_ops) {
578 st = rrdset_find_bytype("disk_await", disk);
580 st = rrdset_create("disk_await", disk, NULL, family, "disk.await", "Average Completed I/O Operation Time", "ms per operation", 2005, update_every, RRDSET_TYPE_LINE);
583 rrddim_add(st, "reads", NULL, 1, 1, RRDDIM_ABSOLUTE);
584 rrddim_add(st, "writes", NULL, -1, 1, RRDDIM_ABSOLUTE);
586 else rrdset_next_usec(st, dt);
588 rrddim_set(st, "reads", (reads - last_reads) ? (readms - last_readms) / (reads - last_reads) : 0);
589 rrddim_set(st, "writes", (writes - last_writes) ? (writems - last_writems) / (writes - last_writes) : 0);
593 if(ddo_io && ddo_ops) {
594 st = rrdset_find_bytype("disk_avgsz", disk);
596 st = rrdset_create("disk_avgsz", disk, NULL, family, "disk.avgsz", "Average Completed I/O Operation Bandwidth", "kilobytes per operation", 2006, update_every, RRDSET_TYPE_AREA);
599 rrddim_add(st, "reads", NULL, sector_size, 1024, RRDDIM_ABSOLUTE);
600 rrddim_add(st, "writes", NULL, -sector_size, 1024, RRDDIM_ABSOLUTE);
602 else rrdset_next_usec(st, dt);
604 rrddim_set(st, "reads", (reads - last_reads) ? (readsectors - last_readsectors) / (reads - last_reads) : 0);
605 rrddim_set(st, "writes", (writes - last_writes) ? (writesectors - last_writesectors) / (writes - last_writes) : 0);
609 if(ddo_util && ddo_ops) {
610 st = rrdset_find_bytype("disk_svctm", disk);
612 st = rrdset_create("disk_svctm", disk, NULL, family, "disk.svctm", "Average Service Time", "ms per operation", 2007, update_every, RRDSET_TYPE_LINE);
615 rrddim_add(st, "svctm", NULL, 1, 1, RRDDIM_ABSOLUTE);
617 else rrdset_next_usec(st, dt);
619 rrddim_set(st, "svctm", ((reads - last_reads) + (writes - last_writes)) ? (busy_ms - last_busy_ms) / ((reads - last_reads) + (writes - last_writes)) : 0);