]> arthur.barton.de Git - netdata.git/blob - src/proc_diskstats.c
Merge pull request #1494 from ktsaou/master
[netdata.git] / src / proc_diskstats.c
1 #include "common.h"
2
3 #define RRD_TYPE_DISK "disk"
4
5 #define DISK_TYPE_PHYSICAL  1
6 #define DISK_TYPE_PARTITION 2
7 #define DISK_TYPE_CONTAINER 3
8
9 #ifndef NETDATA_RELOAD_MOUNTINFO_EVERY
10 #define NETDATA_RELOAD_MOUNTINFO_EVERY 10
11 #endif
12
13 static struct disk {
14     char *disk;             // the name of the disk (sda, sdb, etc)
15     unsigned long major;
16     unsigned long minor;
17     int sector_size;
18     int type;
19
20     char *mount_point;
21
22     // disk options caching
23     int configured;
24     int do_io;
25     int do_ops;
26     int do_mops;
27     int do_iotime;
28     int do_qops;
29     int do_util;
30     int do_backlog;
31
32     struct disk *next;
33 } *disk_root = NULL;
34
35 static struct mountinfo *disk_mountinfo_root = NULL;
36
37 static inline void mountinfo_reload(int force) {
38     static time_t last_loaded = 0;
39     time_t now = now_realtime_sec();
40
41     if(force || now - last_loaded >= NETDATA_RELOAD_MOUNTINFO_EVERY) {
42 //#ifdef NETDATA_INTERNAL_CHECKS
43 //        info("Reloading mountinfo");
44 //#endif
45
46         // mountinfo_free() can be called with NULL disk_mountinfo_root
47         mountinfo_free(disk_mountinfo_root);
48
49         // re-read mountinfo in case something changed
50         disk_mountinfo_root = mountinfo_read();
51
52         last_loaded = now;
53     }
54 }
55
56
57 // linked list of mount points that are by default disabled
58 static struct excluded_mount_point {
59     const char *prefix;
60     size_t len;
61     struct excluded_mount_point *next;
62 } *excluded_mount_points = NULL;
63
64 static inline int is_mount_point_excluded(const char *mount_point) {
65     static int initialized = 0;
66
67     if(unlikely(!initialized)) {
68         initialized = 1;
69
70         char *a = config_get("plugin:proc:/proc/diskstats", "exclude space metrics on paths", "/proc/ /sys/ /var/run/user/ /run/user/");
71         if(a && *a) {
72             char *s = a;
73
74             while(s && *s) {
75                 // skip all spaces
76                 while(isspace(*s)) s++;
77
78                 // empty string
79                 if(unlikely(!*s)) break;
80
81                 // find the next space
82                 char *c = s;
83                 while(*c && !isspace(*c)) c++;
84
85                 char *n;
86                 if(likely(*c)) n = c + 1;
87                 else n = NULL;
88
89                 // terminate our string
90                 *c = '\0';
91
92                 // allocate the structure
93                 struct excluded_mount_point *m = mallocz(sizeof(struct excluded_mount_point));
94                 m->prefix = strdup(s);
95                 m->len = strlen(m->prefix);
96                 m->next = excluded_mount_points;
97                 excluded_mount_points = m;
98
99                 // prepare for next loop
100                 s = n;
101                 if(likely(n)) *c = ' ';
102             }
103         }
104     }
105
106     size_t len = strlen(mount_point);
107     struct excluded_mount_point *m;
108     for(m = excluded_mount_points; m ; m = m->next) {
109         if(m->len <= len) {
110             // fprintf(stderr, "SPACE: comparing '%s' with '%s'\n", mount_point, m->prefix);
111             if(unlikely(strncmp(m->prefix, mount_point, m->len) == 0)) {
112                 // fprintf(stderr, "SPACE: excluded '%s'\n", mount_point);
113                 return 1;
114             }
115         }
116     }
117
118     // fprintf(stderr, "SPACE: included '%s'\n", mount_point);
119     return 0;
120 }
121
122 // Data to be stored in DICTIONARY mount_points used by do_disk_space_stats().
123 // This DICTIONARY is used to lookup the settings of the mount point on each iteration.
124 struct mount_point_metadata {
125     int do_space;
126     int do_inodes;
127 };
128
129 static inline void do_disk_space_stats(struct mountinfo *mi, int update_every, usec_t dt) {
130     (void)dt;
131
132     const char *family = mi->mount_point;
133     const char *disk = mi->persistent_id;
134
135     static DICTIONARY *mount_points = NULL;
136     int do_space, do_inodes;
137
138     if(unlikely(!mount_points)) {
139         mount_points = dictionary_create(DICTIONARY_FLAG_SINGLE_THREADED);
140     }
141
142     struct mount_point_metadata *m = dictionary_get(mount_points, mi->mount_point);
143     if(unlikely(!m)) {
144         char var_name[4096 + 1];
145         snprintfz(var_name, 4096, "plugin:proc:/proc/diskstats:%s", mi->mount_point);
146
147         int def_space = config_get_boolean_ondemand("plugin:proc:/proc/diskstats", "space usage for all disks", CONFIG_ONDEMAND_ONDEMAND);
148         int def_inodes = config_get_boolean_ondemand("plugin:proc:/proc/diskstats", "inodes usage for all disks", CONFIG_ONDEMAND_ONDEMAND);
149
150         if(unlikely(is_mount_point_excluded(mi->mount_point))) {
151             def_space = CONFIG_ONDEMAND_NO;
152             def_inodes = CONFIG_ONDEMAND_NO;
153         }
154
155         do_space = config_get_boolean_ondemand(var_name, "space usage", def_space);
156         do_inodes = config_get_boolean_ondemand(var_name, "inodes usage", def_inodes);
157
158         struct mount_point_metadata mp = {
159             .do_space = do_space,
160             .do_inodes = do_inodes
161         };
162
163         dictionary_set(mount_points, mi->mount_point, &mp, sizeof(struct mount_point_metadata));
164     }
165     else {
166         do_space = m->do_space;
167         do_inodes = m->do_inodes;
168     }
169
170     if(unlikely(do_space == CONFIG_ONDEMAND_NO && do_inodes == CONFIG_ONDEMAND_NO))
171         return;
172
173     struct statvfs buff_statvfs;
174     if (statvfs(mi->mount_point, &buff_statvfs) < 0) {
175         error("Failed statvfs() for '%s' (disk '%s')", mi->mount_point, disk);
176         return;
177     }
178
179     // taken from get_fs_usage() found in coreutils
180     unsigned long bsize = (buff_statvfs.f_frsize) ? buff_statvfs.f_frsize : buff_statvfs.f_bsize;
181
182     fsblkcnt_t bavail         = buff_statvfs.f_bavail;
183     fsblkcnt_t btotal         = buff_statvfs.f_blocks;
184     fsblkcnt_t bavail_root    = buff_statvfs.f_bfree;
185     fsblkcnt_t breserved_root = bavail_root - bavail;
186     fsblkcnt_t bused;
187     if(likely(btotal >= bavail_root))
188         bused = btotal - bavail_root;
189     else
190         bused = bavail_root - btotal;
191
192 #ifdef NETDATA_INTERNAL_CHECKS
193     if(unlikely(btotal != bavail + breserved_root + bused))
194         error("Disk block statistics for '%s' (disk '%s') do not sum up: total = %llu, available = %llu, reserved = %llu, used = %llu", mi->mount_point, disk, (unsigned long long)btotal, (unsigned long long)bavail, (unsigned long long)breserved_root, (unsigned long long)bused);
195 #endif
196
197     // --------------------------------------------------------------------------
198
199     fsfilcnt_t favail         = buff_statvfs.f_favail;
200     fsfilcnt_t ftotal         = buff_statvfs.f_files;
201     fsfilcnt_t favail_root    = buff_statvfs.f_ffree;
202     fsfilcnt_t freserved_root = favail_root - favail;
203     fsfilcnt_t fused          = ftotal - favail_root;
204
205 #ifdef NETDATA_INTERNAL_CHECKS
206     if(unlikely(btotal != bavail + breserved_root + bused))
207         error("Disk inode statistics for '%s' (disk '%s') do not sum up: total = %llu, available = %llu, reserved = %llu, used = %llu", mi->mount_point, disk, (unsigned long long)ftotal, (unsigned long long)favail, (unsigned long long)freserved_root, (unsigned long long)fused);
208 #endif
209
210     // --------------------------------------------------------------------------
211
212     RRDSET *st;
213
214     if(do_space == CONFIG_ONDEMAND_YES || (do_space == CONFIG_ONDEMAND_ONDEMAND && (bavail || breserved_root || bused))) {
215         st = rrdset_find_bytype("disk_space", disk);
216         if(unlikely(!st)) {
217             char title[4096 + 1];
218             snprintfz(title, 4096, "Disk Space Usage for %s [%s]", family, mi->mount_source);
219             st = rrdset_create("disk_space", disk, NULL, family, "disk.space", title, "GB", 2023, update_every, RRDSET_TYPE_STACKED);
220
221             rrddim_add(st, "avail", NULL, bsize, 1024*1024*1024, RRDDIM_ABSOLUTE);
222             rrddim_add(st, "used" , NULL, bsize, 1024*1024*1024, RRDDIM_ABSOLUTE);
223             rrddim_add(st, "reserved_for_root", "reserved for root", bsize, 1024*1024*1024, RRDDIM_ABSOLUTE);
224         }
225         else rrdset_next(st);
226
227         rrddim_set(st, "avail", (collected_number)bavail);
228         rrddim_set(st, "used", (collected_number)bused);
229         rrddim_set(st, "reserved_for_root", (collected_number)breserved_root);
230         rrdset_done(st);
231     }
232
233     // --------------------------------------------------------------------------
234
235     if(do_inodes == CONFIG_ONDEMAND_YES || (do_inodes == CONFIG_ONDEMAND_ONDEMAND && (favail || freserved_root || fused))) {
236         st = rrdset_find_bytype("disk_inodes", disk);
237         if(unlikely(!st)) {
238             char title[4096 + 1];
239             snprintfz(title, 4096, "Disk Files (inodes) Usage for %s [%s]", family, mi->mount_source);
240             st = rrdset_create("disk_inodes", disk, NULL, family, "disk.inodes", title, "Inodes", 2024, update_every, RRDSET_TYPE_STACKED);
241
242             rrddim_add(st, "avail", NULL, 1, 1, RRDDIM_ABSOLUTE);
243             rrddim_add(st, "used" , NULL, 1, 1, RRDDIM_ABSOLUTE);
244             rrddim_add(st, "reserved_for_root", "reserved for root", 1, 1, RRDDIM_ABSOLUTE);
245         }
246         else rrdset_next(st);
247
248         rrddim_set(st, "avail", (collected_number)favail);
249         rrddim_set(st, "used", (collected_number)fused);
250         rrddim_set(st, "reserved_for_root", (collected_number)freserved_root);
251         rrdset_done(st);
252     }
253 }
254
255 static struct disk *get_disk(unsigned long major, unsigned long minor, char *disk) {
256     static char path_to_get_hw_sector_size[FILENAME_MAX + 1] = "";
257     static char path_to_get_hw_sector_size_partitions[FILENAME_MAX + 1] = "";
258     static char path_find_block_device[FILENAME_MAX + 1] = "";
259     struct disk *d;
260
261     // search for it in our RAM list.
262     // this is sequential, but since we just walk through
263     // and the number of disks / partitions in a system
264     // should not be that many, it should be acceptable
265     for(d = disk_root; d ; d = d->next)
266         if(unlikely(d->major == major && d->minor == minor))
267             break;
268
269     // if we found it, return it
270     if(likely(d))
271         return d;
272
273     // not found
274     // create a new disk structure
275     d = (struct disk *)mallocz(sizeof(struct disk));
276
277     d->disk = strdupz(disk);
278     d->major = major;
279     d->minor = minor;
280     d->type = DISK_TYPE_PHYSICAL; // Default type. Changed later if not correct.
281     d->configured = 0;
282     d->sector_size = 512; // the default, will be changed below
283     d->next = NULL;
284
285     // append it to the list
286     if(unlikely(!disk_root))
287         disk_root = d;
288     else {
289         struct disk *last;
290         for(last = disk_root; last->next ;last = last->next);
291         last->next = d;
292     }
293
294     // ------------------------------------------------------------------------
295     // find the type of the device
296
297     char buffer[FILENAME_MAX + 1];
298
299     // get the default path for finding info about the block device
300     if(unlikely(!path_find_block_device[0])) {
301         snprintfz(buffer, FILENAME_MAX, "%s%s", global_host_prefix, "/sys/dev/block/%lu:%lu/%s");
302         snprintfz(path_find_block_device, FILENAME_MAX, "%s", config_get("plugin:proc:/proc/diskstats", "path to get block device infos", buffer));
303     }
304
305     // find if it is a partition
306     // by checking if /sys/dev/block/MAJOR:MINOR/partition is readable.
307     snprintfz(buffer, FILENAME_MAX, path_find_block_device, major, minor, "partition");
308     if(likely(access(buffer, R_OK) == 0)) {
309         d->type = DISK_TYPE_PARTITION;
310     }
311     else {
312         // find if it is a container
313         // by checking if /sys/dev/block/MAJOR:MINOR/slaves has entries
314         snprintfz(buffer, FILENAME_MAX, path_find_block_device, major, minor, "slaves/");
315         DIR *dirp = opendir(buffer);
316         if(likely(dirp != NULL)) {
317             struct dirent *dp;
318             while( (dp = readdir(dirp)) ) {
319                 // . and .. are also files in empty folders.
320                 if(unlikely(strcmp(dp->d_name, ".") == 0 || strcmp(dp->d_name, "..") == 0)) {
321                     continue;
322                 }
323
324                 d->type = DISK_TYPE_CONTAINER;
325
326                 // Stop the loop after we found one file.
327                 break;
328             }
329             if(unlikely(closedir(dirp) == -1))
330                 error("Unable to close dir %s", buffer);
331         }
332     }
333
334     // ------------------------------------------------------------------------
335     // check if we can find its mount point
336
337     // mountinfo_find() can be called with NULL disk_mountinfo_root
338     struct mountinfo *mi = mountinfo_find(disk_mountinfo_root, d->major, d->minor);
339     if(unlikely(mi))
340         d->mount_point = strdupz(mi->mount_point);
341     else
342         d->mount_point = NULL;
343
344     // ------------------------------------------------------------------------
345     // find the disk sector size
346
347     if(unlikely(!path_to_get_hw_sector_size[0])) {
348         snprintfz(buffer, FILENAME_MAX, "%s%s", global_host_prefix, "/sys/block/%s/queue/hw_sector_size");
349         snprintfz(path_to_get_hw_sector_size, FILENAME_MAX, "%s", config_get("plugin:proc:/proc/diskstats", "path to get h/w sector size", buffer));
350     }
351     if(unlikely(!path_to_get_hw_sector_size_partitions[0])) {
352         snprintfz(buffer, FILENAME_MAX, "%s%s", global_host_prefix, "/sys/dev/block/%lu:%lu/subsystem/%s/../queue/hw_sector_size");
353         snprintfz(path_to_get_hw_sector_size_partitions, FILENAME_MAX, "%s", config_get("plugin:proc:/proc/diskstats", "path to get h/w sector size for partitions", buffer));
354     }
355
356     {
357         char tf[FILENAME_MAX + 1], *t;
358         strncpyz(tf, d->disk, FILENAME_MAX);
359
360         // replace all / with !
361         for(t = tf; *t ;t++)
362             if(unlikely(*t == '/')) *t = '!';
363
364         if(likely(d->type == DISK_TYPE_PARTITION))
365             snprintfz(buffer, FILENAME_MAX, path_to_get_hw_sector_size_partitions, d->major, d->minor, tf);
366         else
367             snprintfz(buffer, FILENAME_MAX, path_to_get_hw_sector_size, tf);
368
369         FILE *fpss = fopen(buffer, "r");
370         if(likely(fpss)) {
371             char buffer2[1024 + 1];
372             char *tmp = fgets(buffer2, 1024, fpss);
373
374             if(likely(tmp)) {
375                 d->sector_size = atoi(tmp);
376                 if(unlikely(d->sector_size <= 0)) {
377                     error("Invalid sector size %d for device %s in %s. Assuming 512.", d->sector_size, d->disk, buffer);
378                     d->sector_size = 512;
379                 }
380             }
381             else error("Cannot read data for sector size for device %s from %s. Assuming 512.", d->disk, buffer);
382
383             fclose(fpss);
384         }
385         else error("Cannot read sector size for device %s from %s. Assuming 512.", d->disk, buffer);
386     }
387
388     return d;
389 }
390
391 static inline int select_positive_option(int option1, int option2) {
392     if(unlikely(option1 == CONFIG_ONDEMAND_YES || option2 == CONFIG_ONDEMAND_YES))
393         return CONFIG_ONDEMAND_YES;
394     else if(unlikely(option1 == CONFIG_ONDEMAND_ONDEMAND || option2 == CONFIG_ONDEMAND_ONDEMAND))
395         return CONFIG_ONDEMAND_ONDEMAND;
396
397     return CONFIG_ONDEMAND_NO;
398 }
399
400 static inline int is_major_enabled(int major) {
401     static char *major_configs = NULL;
402     static size_t major_size = 0;
403
404     if(major < 0) return 1;
405
406     size_t wanted_size = (size_t)major + 1;
407
408     if(major_size < wanted_size) {
409         major_configs = reallocz(major_configs, wanted_size);
410
411         size_t i;
412         for(i = major_size; i < wanted_size ; i++)
413             major_configs[i] = -1;
414
415         major_size = wanted_size;
416     }
417
418     if(major_configs[major] == -1) {
419         char buffer[CONFIG_MAX_NAME + 1];
420         snprintfz(buffer, CONFIG_MAX_NAME, "performance metrics for disks with major %d", major);
421         major_configs[major] = (char)config_get_boolean("plugin:proc:/proc/diskstats", buffer, 1);
422     }
423
424     return major_configs[major];
425 }
426
427 int do_proc_diskstats(int update_every, usec_t dt) {
428     (void)dt;
429
430     static procfile *ff = NULL;
431     static int  global_enable_new_disks_detected_at_runtime = CONFIG_ONDEMAND_YES,
432                 global_enable_performance_for_physical_disks = CONFIG_ONDEMAND_ONDEMAND,
433                 global_enable_performance_for_virtual_disks = CONFIG_ONDEMAND_ONDEMAND,
434                 global_enable_performance_for_partitions = CONFIG_ONDEMAND_NO,
435                 global_do_io = CONFIG_ONDEMAND_ONDEMAND,
436                 global_do_ops = CONFIG_ONDEMAND_ONDEMAND,
437                 global_do_mops = CONFIG_ONDEMAND_ONDEMAND,
438                 global_do_iotime = CONFIG_ONDEMAND_ONDEMAND,
439                 global_do_qops = CONFIG_ONDEMAND_ONDEMAND,
440                 global_do_util = CONFIG_ONDEMAND_ONDEMAND,
441                 global_do_backlog = CONFIG_ONDEMAND_ONDEMAND,
442                 globals_initialized = 0;
443
444     if(unlikely(!globals_initialized)) {
445         global_enable_new_disks_detected_at_runtime = config_get_boolean("plugin:proc:/proc/diskstats", "enable new disks detected at runtime", global_enable_new_disks_detected_at_runtime);
446
447         global_enable_performance_for_physical_disks = config_get_boolean_ondemand("plugin:proc:/proc/diskstats", "performance metrics for physical disks", global_enable_performance_for_physical_disks);
448         global_enable_performance_for_virtual_disks = config_get_boolean_ondemand("plugin:proc:/proc/diskstats", "performance metrics for virtual disks", global_enable_performance_for_virtual_disks);
449         global_enable_performance_for_partitions = config_get_boolean_ondemand("plugin:proc:/proc/diskstats", "performance metrics for partitions", global_enable_performance_for_partitions);
450
451         global_do_io      = config_get_boolean_ondemand("plugin:proc:/proc/diskstats", "bandwidth for all disks", global_do_io);
452         global_do_ops     = config_get_boolean_ondemand("plugin:proc:/proc/diskstats", "operations for all disks", global_do_ops);
453         global_do_mops    = config_get_boolean_ondemand("plugin:proc:/proc/diskstats", "merged operations for all disks", global_do_mops);
454         global_do_iotime  = config_get_boolean_ondemand("plugin:proc:/proc/diskstats", "i/o time for all disks", global_do_iotime);
455         global_do_qops    = config_get_boolean_ondemand("plugin:proc:/proc/diskstats", "queued operations for all disks", global_do_qops);
456         global_do_util    = config_get_boolean_ondemand("plugin:proc:/proc/diskstats", "utilization percentage for all disks", global_do_util);
457         global_do_backlog = config_get_boolean_ondemand("plugin:proc:/proc/diskstats", "backlog for all disks", global_do_backlog);
458
459         globals_initialized = 1;
460     }
461
462     // --------------------------------------------------------------------------
463     // this is smart enough not to reload it every time
464
465     mountinfo_reload(0);
466
467     // --------------------------------------------------------------------------
468     // disk space metrics
469
470     struct mountinfo *mi;
471     for(mi = disk_mountinfo_root; mi ;mi = mi->next) {
472         if(unlikely(mi->flags & (MOUNTINFO_IS_DUMMY|MOUNTINFO_IS_BIND|MOUNTINFO_IS_SAME_DEV|MOUNTINFO_NO_STAT|MOUNTINFO_NO_SIZE|MOUNTINFO_READONLY)))
473             continue;
474
475         do_disk_space_stats(mi, update_every, dt);
476     }
477
478     // --------------------------------------------------------------------------
479
480     if(unlikely(!ff)) {
481         char filename[FILENAME_MAX + 1];
482         snprintfz(filename, FILENAME_MAX, "%s%s", global_host_prefix, "/proc/diskstats");
483         ff = procfile_open(config_get("plugin:proc:/proc/diskstats", "filename to monitor", filename), " \t", PROCFILE_FLAG_DEFAULT);
484     }
485     if(unlikely(!ff)) return 0;
486
487     ff = procfile_readall(ff);
488     if(unlikely(!ff)) return 0; // we return 0, so that we will retry to open it next time
489
490     uint32_t lines = procfile_lines(ff), l;
491
492     for(l = 0; l < lines ;l++) {
493         // --------------------------------------------------------------------------
494         // Read parameters
495
496         char *disk;
497         unsigned long       major = 0, minor = 0;
498
499         collected_number    reads = 0,  mreads = 0,  readsectors = 0,  readms = 0,
500                             writes = 0, mwrites = 0, writesectors = 0, writems = 0,
501                             queued_ios = 0, busy_ms = 0, backlog_ms = 0;
502
503         collected_number    last_reads = 0,  last_readsectors = 0,  last_readms = 0,
504                             last_writes = 0, last_writesectors = 0, last_writems = 0,
505                             last_busy_ms = 0;
506
507         uint32_t words = procfile_linewords(ff, l);
508         if(unlikely(words < 14)) continue;
509
510         major           = strtoul(procfile_lineword(ff, l, 0), NULL, 10);
511         minor           = strtoul(procfile_lineword(ff, l, 1), NULL, 10);
512         disk            = procfile_lineword(ff, l, 2);
513
514         // # of reads completed # of writes completed
515         // This is the total number of reads or writes completed successfully.
516         reads           = strtoull(procfile_lineword(ff, l, 3), NULL, 10);  // rd_ios
517         writes          = strtoull(procfile_lineword(ff, l, 7), NULL, 10);  // wr_ios
518
519         // # of reads merged # of writes merged
520         // Reads and writes which are adjacent to each other may be merged for
521         // efficiency.  Thus two 4K reads may become one 8K read before it is
522         // ultimately handed to the disk, and so it will be counted (and queued)
523         mreads          = strtoull(procfile_lineword(ff, l, 4), NULL, 10);  // rd_merges_or_rd_sec
524         mwrites         = strtoull(procfile_lineword(ff, l, 8), NULL, 10);  // wr_merges
525
526         // # of sectors read # of sectors written
527         // This is the total number of sectors read or written successfully.
528         readsectors     = strtoull(procfile_lineword(ff, l, 5), NULL, 10);  // rd_sec_or_wr_ios
529         writesectors    = strtoull(procfile_lineword(ff, l, 9), NULL, 10);  // wr_sec
530
531         // # of milliseconds spent reading # of milliseconds spent writing
532         // This is the total number of milliseconds spent by all reads or writes (as
533         // measured from __make_request() to end_that_request_last()).
534         readms          = strtoull(procfile_lineword(ff, l, 6), NULL, 10);  // rd_ticks_or_wr_sec
535         writems         = strtoull(procfile_lineword(ff, l, 10), NULL, 10); // wr_ticks
536
537         // # of I/Os currently in progress
538         // The only field that should go to zero. Incremented as requests are
539         // given to appropriate struct request_queue and decremented as they finish.
540         queued_ios      = strtoull(procfile_lineword(ff, l, 11), NULL, 10); // ios_pgr
541
542         // # of milliseconds spent doing I/Os
543         // This field increases so long as field queued_ios is nonzero.
544         busy_ms         = strtoull(procfile_lineword(ff, l, 12), NULL, 10); // tot_ticks
545
546         // weighted # of milliseconds spent doing I/Os
547         // This field is incremented at each I/O start, I/O completion, I/O
548         // merge, or read of these stats by the number of I/Os in progress
549         // (field queued_ios) times the number of milliseconds spent doing I/O since the
550         // last update of this field.  This can provide an easy measure of both
551         // I/O completion time and the backlog that may be accumulating.
552         backlog_ms      = strtoull(procfile_lineword(ff, l, 13), NULL, 10); // rq_ticks
553
554
555         // --------------------------------------------------------------------------
556         // remove slashes from disk names
557         char *s;
558         for(s = disk; *s ;s++)
559             if(*s == '/') *s = '_';
560
561         // --------------------------------------------------------------------------
562         // get a disk structure for the disk
563
564         struct disk *d = get_disk(major, minor, disk);
565
566
567         // --------------------------------------------------------------------------
568         // Set its family based on mount point
569
570         char *family = d->mount_point;
571         if(!family) family = disk;
572
573
574         // --------------------------------------------------------------------------
575         // Check the configuration for the device
576
577         if(unlikely(!d->configured)) {
578             char var_name[4096 + 1];
579             snprintfz(var_name, 4096, "plugin:proc:/proc/diskstats:%s", disk);
580
581             int def_enable = config_get_boolean_ondemand(var_name, "enable", global_enable_new_disks_detected_at_runtime);
582             if(unlikely(def_enable == CONFIG_ONDEMAND_NO)) {
583                 // the user does not want any metrics for this disk
584                 d->do_io = CONFIG_ONDEMAND_NO;
585                 d->do_ops = CONFIG_ONDEMAND_NO;
586                 d->do_mops = CONFIG_ONDEMAND_NO;
587                 d->do_iotime = CONFIG_ONDEMAND_NO;
588                 d->do_qops = CONFIG_ONDEMAND_NO;
589                 d->do_util = CONFIG_ONDEMAND_NO;
590                 d->do_backlog = CONFIG_ONDEMAND_NO;
591             }
592             else {
593                 // this disk is enabled
594                 // check its direct settings
595
596                 int def_performance = CONFIG_ONDEMAND_ONDEMAND;
597
598                 // since this is 'on demand' we can figure the performance settings
599                 // based on the type of disk
600
601                 switch(d->type) {
602                     case DISK_TYPE_PHYSICAL:
603                         def_performance = global_enable_performance_for_physical_disks;
604                         break;
605
606                     case DISK_TYPE_PARTITION:
607                         def_performance = global_enable_performance_for_partitions;
608                         break;
609
610                     case DISK_TYPE_CONTAINER:
611                         def_performance = global_enable_performance_for_virtual_disks;
612                         break;
613                 }
614
615                 // check if we have to disable performance for this disk
616                 if(def_performance)
617                     def_performance = is_major_enabled((int)major);
618
619                 // ------------------------------------------------------------
620                 // now we have def_performance and def_space
621                 // to work further
622
623                 // def_performance
624                 // check the user configuration (this will also show our 'on demand' decision)
625                 def_performance = config_get_boolean_ondemand(var_name, "enable performance metrics", def_performance);
626
627                 int ddo_io = CONFIG_ONDEMAND_NO,
628                     ddo_ops = CONFIG_ONDEMAND_NO,
629                     ddo_mops = CONFIG_ONDEMAND_NO,
630                     ddo_iotime = CONFIG_ONDEMAND_NO,
631                     ddo_qops = CONFIG_ONDEMAND_NO,
632                     ddo_util = CONFIG_ONDEMAND_NO,
633                     ddo_backlog = CONFIG_ONDEMAND_NO;
634
635                 // we enable individual performance charts only when def_performance is not disabled
636                 if(unlikely(def_performance != CONFIG_ONDEMAND_NO)) {
637                     ddo_io = global_do_io,
638                     ddo_ops = global_do_ops,
639                     ddo_mops = global_do_mops,
640                     ddo_iotime = global_do_iotime,
641                     ddo_qops = global_do_qops,
642                     ddo_util = global_do_util,
643                     ddo_backlog = global_do_backlog;
644                 }
645
646                 d->do_io      = config_get_boolean_ondemand(var_name, "bandwidth", ddo_io);
647                 d->do_ops     = config_get_boolean_ondemand(var_name, "operations", ddo_ops);
648                 d->do_mops    = config_get_boolean_ondemand(var_name, "merged operations", ddo_mops);
649                 d->do_iotime  = config_get_boolean_ondemand(var_name, "i/o time", ddo_iotime);
650                 d->do_qops    = config_get_boolean_ondemand(var_name, "queued operations", ddo_qops);
651                 d->do_util    = config_get_boolean_ondemand(var_name, "utilization percentage", ddo_util);
652                 d->do_backlog = config_get_boolean_ondemand(var_name, "backlog", ddo_backlog);
653             }
654
655             d->configured = 1;
656         }
657
658         RRDSET *st;
659
660         // --------------------------------------------------------------------------
661         // Do performance metrics
662
663         if(d->do_io == CONFIG_ONDEMAND_YES || (d->do_io == CONFIG_ONDEMAND_ONDEMAND && (readsectors || writesectors))) {
664             d->do_io = CONFIG_ONDEMAND_YES;
665
666             st = rrdset_find_bytype(RRD_TYPE_DISK, disk);
667             if(unlikely(!st)) {
668                 st = rrdset_create(RRD_TYPE_DISK, disk, NULL, family, "disk.io", "Disk I/O Bandwidth", "kilobytes/s", 2000, update_every, RRDSET_TYPE_AREA);
669
670                 rrddim_add(st, "reads", NULL, d->sector_size, 1024, RRDDIM_INCREMENTAL);
671                 rrddim_add(st, "writes", NULL, d->sector_size * -1, 1024, RRDDIM_INCREMENTAL);
672             }
673             else rrdset_next(st);
674
675             last_readsectors  = rrddim_set(st, "reads", readsectors);
676             last_writesectors = rrddim_set(st, "writes", writesectors);
677             rrdset_done(st);
678         }
679
680         // --------------------------------------------------------------------
681
682         if(d->do_ops == CONFIG_ONDEMAND_YES || (d->do_ops == CONFIG_ONDEMAND_ONDEMAND && (reads || writes))) {
683             d->do_ops = CONFIG_ONDEMAND_YES;
684
685             st = rrdset_find_bytype("disk_ops", disk);
686             if(unlikely(!st)) {
687                 st = rrdset_create("disk_ops", disk, NULL, family, "disk.ops", "Disk Completed I/O Operations", "operations/s", 2001, update_every, RRDSET_TYPE_LINE);
688                 st->isdetail = 1;
689
690                 rrddim_add(st, "reads", NULL, 1, 1, RRDDIM_INCREMENTAL);
691                 rrddim_add(st, "writes", NULL, -1, 1, RRDDIM_INCREMENTAL);
692             }
693             else rrdset_next(st);
694
695             last_reads  = rrddim_set(st, "reads", reads);
696             last_writes = rrddim_set(st, "writes", writes);
697             rrdset_done(st);
698         }
699
700         // --------------------------------------------------------------------
701
702         if(d->do_qops == CONFIG_ONDEMAND_YES || (d->do_qops == CONFIG_ONDEMAND_ONDEMAND && queued_ios)) {
703             d->do_qops = CONFIG_ONDEMAND_YES;
704
705             st = rrdset_find_bytype("disk_qops", disk);
706             if(unlikely(!st)) {
707                 st = rrdset_create("disk_qops", disk, NULL, family, "disk.qops", "Disk Current I/O Operations", "operations", 2002, update_every, RRDSET_TYPE_LINE);
708                 st->isdetail = 1;
709
710                 rrddim_add(st, "operations", NULL, 1, 1, RRDDIM_ABSOLUTE);
711             }
712             else rrdset_next(st);
713
714             rrddim_set(st, "operations", queued_ios);
715             rrdset_done(st);
716         }
717
718         // --------------------------------------------------------------------
719
720         if(d->do_backlog == CONFIG_ONDEMAND_YES || (d->do_backlog == CONFIG_ONDEMAND_ONDEMAND && backlog_ms)) {
721             d->do_backlog = CONFIG_ONDEMAND_YES;
722
723             st = rrdset_find_bytype("disk_backlog", disk);
724             if(unlikely(!st)) {
725                 st = rrdset_create("disk_backlog", disk, NULL, family, "disk.backlog", "Disk Backlog", "backlog (ms)", 2003, update_every, RRDSET_TYPE_AREA);
726                 st->isdetail = 1;
727
728                 rrddim_add(st, "backlog", NULL, 1, 10, RRDDIM_INCREMENTAL);
729             }
730             else rrdset_next(st);
731
732             rrddim_set(st, "backlog", backlog_ms);
733             rrdset_done(st);
734         }
735
736         // --------------------------------------------------------------------
737
738         if(d->do_util == CONFIG_ONDEMAND_YES || (d->do_util == CONFIG_ONDEMAND_ONDEMAND && busy_ms)) {
739             d->do_util = CONFIG_ONDEMAND_YES;
740
741             st = rrdset_find_bytype("disk_util", disk);
742             if(unlikely(!st)) {
743                 st = rrdset_create("disk_util", disk, NULL, family, "disk.util", "Disk Utilization Time", "% of time working", 2004, update_every, RRDSET_TYPE_AREA);
744                 st->isdetail = 1;
745
746                 rrddim_add(st, "utilization", NULL, 1, 10, RRDDIM_INCREMENTAL);
747             }
748             else rrdset_next(st);
749
750             last_busy_ms = rrddim_set(st, "utilization", busy_ms);
751             rrdset_done(st);
752         }
753
754         // --------------------------------------------------------------------
755
756         if(d->do_mops == CONFIG_ONDEMAND_YES || (d->do_mops == CONFIG_ONDEMAND_ONDEMAND && (mreads || mwrites))) {
757             d->do_mops = CONFIG_ONDEMAND_YES;
758
759             st = rrdset_find_bytype("disk_mops", disk);
760             if(unlikely(!st)) {
761                 st = rrdset_create("disk_mops", disk, NULL, family, "disk.mops", "Disk Merged Operations", "merged operations/s", 2021, update_every, RRDSET_TYPE_LINE);
762                 st->isdetail = 1;
763
764                 rrddim_add(st, "reads", NULL, 1, 1, RRDDIM_INCREMENTAL);
765                 rrddim_add(st, "writes", NULL, -1, 1, RRDDIM_INCREMENTAL);
766             }
767             else rrdset_next(st);
768
769             rrddim_set(st, "reads", mreads);
770             rrddim_set(st, "writes", mwrites);
771             rrdset_done(st);
772         }
773
774         // --------------------------------------------------------------------
775
776         if(d->do_iotime == CONFIG_ONDEMAND_YES || (d->do_iotime == CONFIG_ONDEMAND_ONDEMAND && (readms || writems))) {
777             d->do_iotime = CONFIG_ONDEMAND_YES;
778
779             st = rrdset_find_bytype("disk_iotime", disk);
780             if(unlikely(!st)) {
781                 st = rrdset_create("disk_iotime", disk, NULL, family, "disk.iotime", "Disk Total I/O Time", "milliseconds/s", 2022, update_every, RRDSET_TYPE_LINE);
782                 st->isdetail = 1;
783
784                 rrddim_add(st, "reads", NULL, 1, 1, RRDDIM_INCREMENTAL);
785                 rrddim_add(st, "writes", NULL, -1, 1, RRDDIM_INCREMENTAL);
786             }
787             else rrdset_next(st);
788
789             last_readms  = rrddim_set(st, "reads", readms);
790             last_writems = rrddim_set(st, "writes", writems);
791             rrdset_done(st);
792         }
793
794         // --------------------------------------------------------------------
795         // calculate differential charts
796         // only if this is not the first time we run
797
798         if(likely(dt)) {
799             if( (d->do_iotime == CONFIG_ONDEMAND_YES || (d->do_iotime == CONFIG_ONDEMAND_ONDEMAND && (readms || writems))) &&
800                 (d->do_ops    == CONFIG_ONDEMAND_YES || (d->do_ops    == CONFIG_ONDEMAND_ONDEMAND && (reads || writes)))) {
801                 st = rrdset_find_bytype("disk_await", disk);
802                 if(unlikely(!st)) {
803                     st = rrdset_create("disk_await", disk, NULL, family, "disk.await", "Average Completed I/O Operation Time", "ms per operation", 2005, update_every, RRDSET_TYPE_LINE);
804                     st->isdetail = 1;
805
806                     rrddim_add(st, "reads", NULL, 1, 1, RRDDIM_ABSOLUTE);
807                     rrddim_add(st, "writes", NULL, -1, 1, RRDDIM_ABSOLUTE);
808                 }
809                 else rrdset_next(st);
810
811                 rrddim_set(st, "reads", (reads - last_reads) ? (readms - last_readms) / (reads - last_reads) : 0);
812                 rrddim_set(st, "writes", (writes - last_writes) ? (writems - last_writems) / (writes - last_writes) : 0);
813                 rrdset_done(st);
814             }
815
816             if( (d->do_io  == CONFIG_ONDEMAND_YES || (d->do_io  == CONFIG_ONDEMAND_ONDEMAND && (readsectors || writesectors))) &&
817                 (d->do_ops == CONFIG_ONDEMAND_YES || (d->do_ops == CONFIG_ONDEMAND_ONDEMAND && (reads || writes)))) {
818                 st = rrdset_find_bytype("disk_avgsz", disk);
819                 if(unlikely(!st)) {
820                     st = rrdset_create("disk_avgsz", disk, NULL, family, "disk.avgsz", "Average Completed I/O Operation Bandwidth", "kilobytes per operation", 2006, update_every, RRDSET_TYPE_AREA);
821                     st->isdetail = 1;
822
823                     rrddim_add(st, "reads", NULL, d->sector_size, 1024, RRDDIM_ABSOLUTE);
824                     rrddim_add(st, "writes", NULL, d->sector_size * -1, 1024, RRDDIM_ABSOLUTE);
825                 }
826                 else rrdset_next(st);
827
828                 rrddim_set(st, "reads", (reads - last_reads) ? (readsectors - last_readsectors) / (reads - last_reads) : 0);
829                 rrddim_set(st, "writes", (writes - last_writes) ? (writesectors - last_writesectors) / (writes - last_writes) : 0);
830                 rrdset_done(st);
831             }
832
833             if( (d->do_util == CONFIG_ONDEMAND_YES || (d->do_util == CONFIG_ONDEMAND_ONDEMAND && busy_ms)) &&
834                 (d->do_ops  == CONFIG_ONDEMAND_YES || (d->do_ops  == CONFIG_ONDEMAND_ONDEMAND && (reads || writes)))) {
835                 st = rrdset_find_bytype("disk_svctm", disk);
836                 if(unlikely(!st)) {
837                     st = rrdset_create("disk_svctm", disk, NULL, family, "disk.svctm", "Average Service Time", "ms per operation", 2007, update_every, RRDSET_TYPE_LINE);
838                     st->isdetail = 1;
839
840                     rrddim_add(st, "svctm", NULL, 1, 1, RRDDIM_ABSOLUTE);
841                 }
842                 else rrdset_next(st);
843
844                 rrddim_set(st, "svctm", ((reads - last_reads) + (writes - last_writes)) ? (busy_ms - last_busy_ms) / ((reads - last_reads) + (writes - last_writes)) : 0);
845                 rrdset_done(st);
846             }
847         }
848     }
849
850     return 0;
851 }