[Orca-checkins] r390 - in trunk/orca: data_gatherers/orcallator lib/SE/3.3.1
dmberezin at hotmail.com
dmberezin at hotmail.com
Tue Sep 14 09:31:45 PDT 2004
Author: dmberezin at hotmail.com
Date: Tue Sep 14 09:26:29 2004
New Revision: 390
Removed:
trunk/orca/lib/SE/3.3.1/tapeinfo.se
Modified:
trunk/orca/data_gatherers/orcallator/orcallator.se
Log:
* data_gatherers/orcallator/orcallator.se
* lib/SE/3.3.1/tapeinfo.se
Revert previous commits, which had files with Windows end-of-line endings,
back to revision 385.
Modified: trunk/orca/data_gatherers/orcallator/orcallator.se
==============================================================================
--- trunk/orca/data_gatherers/orcallator/orcallator.se (original)
+++ trunk/orca/data_gatherers/orcallator/orcallator.se Tue Sep 14 09:26:29 2004
@@ -23,8 +23,8 @@
// The maximum number of columns of data.
#define MAX_COLUMNS 2048
-// Enable kstat io measuring code.
-#define USE_KSTAT_IO 1
+// Enable the raw disk measuring code.
+#define USE_RAWDISK 1
// If WATCH_OS is defined, then measure every part of the operating
// system.
@@ -297,9 +297,8 @@
#endif
-#ifdef USE_KSTAT_IO
+#ifdef USE_RAWDISK
#include <sys_kstat.se>
-#include <tapeinfo.se>
// This code was developed so that the performance of virtual disks
// originating from a Sun A1000 raid controller could be monitored.
// These disks do not show up in the GLOBAL_disk[] io structure of SE.
@@ -307,11 +306,10 @@
// This extension accesses the sys_kstat.se interface to the kstat IO
// queues to extract info on drives not available in the kstat.se
// kstat$disk interface. Global data shared between function calls.
-struct io_dev_info_t {
+struct RawDisk {
// Exposed interface that matches kstat.
- string dev_class;
- string long_name;
- string short_name;
+ char long_name[48];
+ char short_name[8];
double reads;
double kreads;
double writes;
@@ -339,128 +337,293 @@
};
// Define global for tracking raw disk data.
-io_dev_info_t ORCA_io_dev_info[];
-int ORCA_io_dev_count=0;
-int ORCA_max_io_dev_count=0;
-
-orca_io_info_update() {
- int iodev;
- int index;
- ulong ul;
- kstat_ctl_t kc[1];
- kstat_t nkp[1];
- kstat_io_t kio;
-
- double read_writes;
- double big_etime;
- double elapsed_etime;
- double hz_etime;
+#ifndef MAX_RAWDISKS
+#define MAX_RAWDISKS 1024
+#endif
+
+RawDisk RAW_disk[MAX_RAWDISKS];
+int RAW_disk_map=0;
+int RAW_disk_count=0;
+double RAW_disk_lastupdate;
+
+// Compare two short disk names and return if they have on the same
+// physical device name, ignoring slice info.
+int raw_disk_short_name_cmp(char disk1[],
+ int disk1_length,
+ char disk2[],
+ int disk2_length)
+{
+ int i;
+
+ // Handle dad disks first since they do not have commas.
+ if (strncmp("dad", disk1, 3) == 0) {
+ return strncmp(disk1, disk2, disk1_length);
+ }
+
+ // Extract the physical disk name from disk slices. This only works
+ // with SCSI disks where slices have commma separators.
+ for (i=0; i<disk1_length; ++i) {
+ if (disk1[i] == ',') {
+ disk1_length = i;
+ break;
+ }
+ }
+ for (i=0; i<disk2_length; ++i) {
+ if (disk2[i] == ',') {
+ disk2_length = i;
+ break;
+ }
+ }
+ if (disk1_length != disk2_length) {
+ return 1;
+ }
+ return strncmp(disk1, disk2, disk1_length);
+}
+
+// Function to scan kstat and map short device names to long device names.
+raw_disk_map() {
+ int first_name_length;
+ char first_name[16];
+ int second_name_length;
+ char second_name[16];
+ char long_name[16];
+ char short_name[16];
+ int short_name_length;
+ int i;
+ int j;
+
+ // This section is used to map short names to long names. Since
+ // raw_disk_update has already identified how many physical devices
+ // it simply tries to find theses devices in GLOBAL_disk_info[].
+ //
+ // SE appears to have a bug where GLOBAL_diskinfo_size can be larger
+ // than the number of entries in GLOBAL_disk_info[] under a variety
+ // of conditions. In later versions of SE GLOBAL_diskinfo_size has
+ // been removed. This appears to fix the above problem. This code
+ // uses MAX_RAWDISKS for the table length and the assumption that
+ // short disks names come before short disk partition names to
+ // detect the end of the table. If it fails to detect the end it
+ // will core dump when it addresses unallocated memory.
+ //
+ // These symbols are used to recognize when we slip past the end of
+ // the raw devices in GLOBAL_disk_info. It would be nice to just
+ // look for a slice like sd0,a but unfortunately EIDE disks do not
+ // have slices.
+ //
+ // Check for the first and second disk in case the CD-ROM shows up
+ // as the first disk since it will not show slice information unless
+ // it is mounted.
+ strcpy(first_name, GLOBAL_disk_info[0].short_name);
+ first_name_length = strlen(first_name);
+ if (MAX_RAWDISKS > 1) {
+ strcpy(second_name, GLOBAL_disk_info[1].short_name);
+ second_name_length = strlen(second_name);
+ }
+ for (i=0; i<RAW_disk_count; ++i) {
+ // Do not map st & fd devices.
+ if (strncmp(RAW_disk[i].short_name, "st", 2) != 0 &&
+ strncmp(RAW_disk[i].short_name, "fd", 2) != 0) {
+ for (j=0; j<MAX_RAWDISKS; ++j) {
+ strcpy(short_name, GLOBAL_disk_info[j].short_name);
+ if (j > 0) {
+ short_name_length = strlen(short_name);
+ if (raw_disk_short_name_cmp(first_name,
+ first_name_length,
+ short_name,
+ short_name_length) == 0) {
+ break;
+ }
+ if (j > 1) {
+ if (raw_disk_short_name_cmp(second_name,
+ second_name_length,
+ short_name,
+ short_name_length) == 0) {
+ break;
+ }
+ }
+ }
+ if (strcmp(RAW_disk[i].short_name, short_name) == 0) {
+ strcpy(long_name, GLOBAL_disk_info[j].long_name);
+ strcpy(RAW_disk[i].long_name, long_name);
+ break;
+ }
+ }
+ }
+ }
+ RAW_disk_map = 0;
+}
+
+raw_disk_update() {
+ int rdisk;
+ ulong ul;
+ kstat_ctl_t kc[1];
+ kstat_t kp[1];
+ kstat_t nkp[1];
+ kstat_io_t kio;
+ ulonglong _nread;
+ ulonglong _nwritten;
+ uint _reads;
+ uint _writes;
+ longlong _wtime;
+ longlong _wlentime;
+ longlong _wlastupdate;
+ longlong _rtime;
+ longlong _rlentime;
+ longlong _rlastupdate;
+ longlong _wcnt;
+ longlong _rcnt;
+
+ double read_writes;
+ double big_etime;
+ double elapsed_etime;
+ double hz_etime;
+ double nanosecond = NANOSEC;
+ double update;
+ double delta;
+ timeval_t time_update[1];
+ ulong time_void;
+ char short_name[8];
+
+ gettimeofday(time_update, time_void);
+ update = time_update[0].tv_sec + (time_update[0].tv_usec / 1000000.0);
+ delta = update - RAW_disk_lastupdate;
+ RAW_disk_lastupdate = update;
- // Initialize kstat control structure
kc[0] = kstat_open();
+ // Read them.
+ if (kstat_read(kc, kp, 0) == -1) {
+ perror("raw_disk_update:kstat_read");
+ exit(1);
+ }
+
// Traverse the chain looking for IO events.
- for (ul=kc[0].kc_chain; ul!=0; ul=nkp[0].ks_next) {
+ for (ul=kc[0].kc_chain; ul !=0; ul=nkp[0].ks_next) {
struct_fill(nkp[0], ul);
if (nkp[0].ks_type == KSTAT_TYPE_IO) {
- // Look for disk or tape statistics
- if (nkp[0].ks_class == "disk" || nkp[0].ks_class == "tape") {
- // Get data from the kernel for this kstat
- if (kstat_read(kc, nkp, 0) == -1) {
- perror("orca_io_info_update:kstat_read error");
- exit(1);
- }
- struct_fill(kio, nkp[0].ks_data);
-
- // Try to locate device in our array
- for (iodev=0; iodev < ORCA_io_dev_count; ++iodev) {
- if (ORCA_io_dev_info[iodev].short_name == nkp[0].ks_name) {
+ strcpy(short_name, nkp[0].ks_name);
+ if (short_name[0] != 'm' &&
+ short_name[0] != 'n' &&
+ strchr(short_name,',') == nil) {
+ // Try to locate device.
+ for (rdisk=0; rdisk<RAW_disk_count; ++rdisk) {
+ if (strcmp(RAW_disk[rdisk].short_name, short_name) == 0) {
break;
}
}
- // It must be new. Add it!
- if (iodev == ORCA_io_dev_count) {
- // Grow the device array if needed
- if (ORCA_io_dev_count == ORCA_max_io_dev_count) {
- ORCA_max_io_dev_count += 10;
- ORCA_io_dev_info = renew ORCA_io_dev_info[ORCA_max_io_dev_count];
- }
+ // It must be new. Add it!
+ if (rdisk == RAW_disk_count) {
+ // Must be a tape drive or something else. Schedule device
+ // name map cycle.
+ RAW_disk_map = 1;
+ strcpy(RAW_disk[rdisk].long_name, short_name);
+ strcpy(RAW_disk[rdisk].short_name, short_name);
+ RAW_disk[rdisk]._reads = 0;
+ RAW_disk[rdisk]._nread = 0;
+ RAW_disk[rdisk]._rlentime = 0;
+ RAW_disk[rdisk]._rlastupdate = boot_time;
+ RAW_disk[rdisk]._rcnt = 0;
+ RAW_disk[rdisk]._writes = 0;
+ RAW_disk[rdisk]._nwritten = 0;
+ RAW_disk[rdisk]._wlentime = 0;
+ RAW_disk[rdisk]._wlastupdate = boot_time;
+ RAW_disk[rdisk]._wcnt = 0;
+ RAW_disk_count++;
+ }
- if (nkp[0].ks_class == "tape") {
- index = find_tape_inst(nkp[0].ks_name);
+ // Update the device registers.
+ if (kstat_read(kc, nkp, 0) == -1) {
+ perror("raw_disk_update:kstat_read error");
+ exit(1);
+ } else {
+ // Read sys_kstat device IO queue to find out about recent
+ // activity. We validate data that is returned. Solaris
+ // 2.6 has occasional glitches when updating certain disks
+ // (c0t0d0) so we cover up the glitches by using data from
+ // the previous cycle. Eventually, we will get a good
+ // update. Fixing the data is not necessarily the best
+ // choice. Currently only kio.nread glitches. Correcting
+ // the error forces the IOs to get attributed to the next IO
+ // cycle.
+ struct_fill(kio, nkp[0].ks_data);
+ _nread = kio.nread;
+ if (RAW_disk[rdisk]._nread > _nread) {
+ _nread = RAW_disk[rdisk]._nread;
+ }
+ _reads = kio.reads;
+ if (RAW_disk[rdisk]._reads > _reads) {
+ _reads = RAW_disk[rdisk]._reads;
+ }
+ _rlentime = kio.rlentime;
+ _rtime = kio.rtime;
+ _rlastupdate = kio.wlastupdate;
+ _rcnt = kio.rcnt;
+ _nwritten = kio.nwritten;
+ if (RAW_disk[rdisk]._nwritten > _nwritten) {
+ _nwritten = RAW_disk[rdisk]._nwritten;
+ }
+ _writes = kio.writes;
+ if (RAW_disk[rdisk]._writes > _writes) {
+ _writes = RAW_disk[rdisk]._nwritten;
+ }
+ _wlentime = kio.wlentime;
+ _wtime = kio.wtime;
+ _wlastupdate = kio.wlastupdate;
+ _wcnt = kio.wcnt;
+
+ elapsed_etime = (_wlastupdate - RAW_disk[rdisk]._wlastupdate);
+ if (elapsed_etime > 0) {
+ hz_etime = elapsed_etime / nanosecond;
+ big_etime = 1024.0 * hz_etime;
} else {
- index = find_inst(nkp[0].ks_name);
+ elapsed_etime = nanosecond;
+ hz_etime = 1.0;
+ big_etime = 1024.0;
}
- if (index != -1) {
- if (nkp[0].ks_class == "tape") {
- ORCA_io_dev_info[iodev].long_name = GLOBAL_tape_info[index].long_name;
- } else {
- ORCA_io_dev_info[iodev].long_name = GLOBAL_disk_info[index].long_name;
- }
+ RAW_disk[rdisk].reads =(_reads-RAW_disk[rdisk]._reads) /hz_etime;
+ RAW_disk[rdisk].kreads =(_nread-RAW_disk[rdisk]._nread) /big_etime;
+ RAW_disk[rdisk].writes =(_writes-RAW_disk[rdisk]._writes)/hz_etime;
+ RAW_disk[rdisk].kwrites=(_nwritten-RAW_disk[rdisk]._nwritten) / big_etime;
+
+ read_writes = elapsed_etime * (RAW_disk[rdisk].reads + RAW_disk[rdisk].writes) / 1024.0;
+ if (read_writes > 0) {
+ RAW_disk[rdisk].avg_wait = (_wlentime - RAW_disk[rdisk]._wlentime) / read_writes;
+ RAW_disk[rdisk].avg_serv = (_rlentime - RAW_disk[rdisk]._rlentime) / read_writes;
+ RAW_disk[rdisk].service = RAW_disk[rdisk].avg_wait + RAW_disk[rdisk].avg_serv;
} else {
- ORCA_io_dev_info[iodev].long_name = nkp[0].ks_name;
+ RAW_disk[rdisk].avg_wait = 0.0;
+ RAW_disk[rdisk].avg_serv = 0.0;
+ RAW_disk[rdisk].service = 0.0;
}
- ORCA_io_dev_info[iodev].short_name = nkp[0].ks_name;
- ORCA_io_dev_info[iodev]._writes = kio.writes;
- ORCA_io_dev_info[iodev]._nwritten = kio.nwritten;
- ORCA_io_dev_info[iodev]._wlastupdate = kio.wlastupdate;
- ORCA_io_dev_info[iodev]._wlentime = kio.wlentime;
- ORCA_io_dev_info[iodev]._wtime = kio.wtime;
- ORCA_io_dev_info[iodev]._wcnt = kio.wcnt;
- ORCA_io_dev_info[iodev]._reads = kio.reads;
- ORCA_io_dev_info[iodev]._nread = kio.nread;
- ORCA_io_dev_info[iodev]._rlastupdate = kio.rlastupdate;
- ORCA_io_dev_info[iodev]._rlentime = kio.rlentime;
- ORCA_io_dev_info[iodev]._rtime = kio.rtime;
- ORCA_io_dev_info[iodev]._rcnt = kio.rcnt;
- ORCA_io_dev_count++;
+ // Update the counters.
+ RAW_disk[rdisk].run_percent = 100.0 * (_rtime - RAW_disk[rdisk]._rtime) / elapsed_etime;
+ RAW_disk[rdisk].wait_percent = 100.0 * (_wtime - RAW_disk[rdisk]._wtime) / elapsed_etime;
+ RAW_disk[rdisk]._writes = _writes;
+ RAW_disk[rdisk]._nwritten = _nwritten;
+ RAW_disk[rdisk]._wlastupdate = _wlastupdate;
+ RAW_disk[rdisk]._wlentime = _wlentime;
+ RAW_disk[rdisk]._wtime = _wtime;
+ RAW_disk[rdisk]._wcnt = _wcnt;
+ RAW_disk[rdisk]._reads = _reads;
+ RAW_disk[rdisk]._nread = _nread;
+ RAW_disk[rdisk]._rlastupdate = _rlastupdate;
+ RAW_disk[rdisk]._rlentime = _rlentime;
+ RAW_disk[rdisk]._rtime = _rtime;
+ RAW_disk[rdisk]._rcnt = _rcnt;
}
-
- elapsed_etime = (kio.wlastupdate - ORCA_io_dev_info[iodev]._wlastupdate);
- if (elapsed_etime == 0) {
- elapsed_etime = NANOSEC;
- }
- hz_etime = elapsed_etime / NANOSEC;
- big_etime = 1024.0 * hz_etime;
-
- ORCA_io_dev_info[iodev].reads =(kio.reads-ORCA_io_dev_info[iodev]._reads) /hz_etime;
- ORCA_io_dev_info[iodev].kreads =(kio.nread-ORCA_io_dev_info[iodev]._nread) /big_etime;
- ORCA_io_dev_info[iodev].writes =(kio.writes-ORCA_io_dev_info[iodev]._writes) /hz_etime;
- ORCA_io_dev_info[iodev].kwrites=(kio.nwritten-ORCA_io_dev_info[iodev]._nwritten)/big_etime;
-
- read_writes = elapsed_etime * (ORCA_io_dev_info[iodev].reads + ORCA_io_dev_info[iodev].writes) / 1024.0;
- if (read_writes > 0) {
- ORCA_io_dev_info[iodev].avg_wait = (kio.wlentime - ORCA_io_dev_info[iodev]._wlentime) / read_writes;
- ORCA_io_dev_info[iodev].avg_serv = (kio.rlentime - ORCA_io_dev_info[iodev]._rlentime) / read_writes;
- ORCA_io_dev_info[iodev].service = ORCA_io_dev_info[iodev].avg_wait + ORCA_io_dev_info[iodev].avg_serv;
- } else {
- ORCA_io_dev_info[iodev].avg_wait = 0.0;
- ORCA_io_dev_info[iodev].avg_serv = 0.0;
- ORCA_io_dev_info[iodev].service = 0.0;
- }
-
- // Update the counters.
- ORCA_io_dev_info[iodev].run_percent = 100.0 * (kio.rtime - ORCA_io_dev_info[iodev]._rtime) / elapsed_etime;
- ORCA_io_dev_info[iodev].wait_percent = 100.0 * (kio.wtime - ORCA_io_dev_info[iodev]._wtime) / elapsed_etime;
- ORCA_io_dev_info[iodev]._writes = kio.writes;
- ORCA_io_dev_info[iodev]._nwritten = kio.nwritten;
- ORCA_io_dev_info[iodev]._wlastupdate = kio.wlastupdate;
- ORCA_io_dev_info[iodev]._wlentime = kio.wlentime;
- ORCA_io_dev_info[iodev]._wtime = kio.wtime;
- ORCA_io_dev_info[iodev]._wcnt = kio.wcnt;
- ORCA_io_dev_info[iodev]._reads = kio.reads;
- ORCA_io_dev_info[iodev]._nread = kio.nread;
- ORCA_io_dev_info[iodev]._rlastupdate = kio.rlastupdate;
- ORCA_io_dev_info[iodev]._rlentime = kio.rlentime;
- ORCA_io_dev_info[iodev]._rtime = kio.rtime;
- ORCA_io_dev_info[iodev]._rcnt = kio.rcnt;
}
}
}
kstat_close(kc);
+
+ // Map long device names for any drives that we just discovered.
+ if (RAW_disk_map == 1) {
+ raw_disk_map();
+ }
}
-#endif // USE_KSTAT_IO
+#endif
+// RAWDISK
// Variables for handling output.
string compress = getenv("COMPRESSOR"); // How to compress logs.
@@ -862,8 +1025,8 @@
tmp_tcp = tcp$tcp;
#endif
-#ifdef USE_KSTAT_IO
- orca_io_info_update();
+#ifdef USE_RAWDISK
+ raw_disk_update();
#endif
}
@@ -1435,29 +1598,29 @@
total_tape_writek = 0.0;
tape_count = 0;
-#ifdef USE_KSTAT_IO
- for (i=0; i<ORCA_io_dev_count; ++i) {
+#ifdef USE_RAWDISK
+ for (i=0; i<RAW_disk_count; ++i) {
// Record tape drive st devices differently than regular disk devices.
- if (ORCA_io_dev_info[i].short_name =~ "^st.*") {
+ if (RAW_disk[i].short_name[0] == 's' && RAW_disk[i].short_name[1] == 't') {
tape_count++;
- total_tape_reads += ORCA_io_dev_info[i].reads;
- total_tape_writes += ORCA_io_dev_info[i].writes;
- total_tape_readk += ORCA_io_dev_info[i].kreads;
- total_tape_writek += ORCA_io_dev_info[i].kwrites;
- put_output(sprintf("tape_runp_%s", ORCA_io_dev_info[i].long_name),
- sprintf("%16.5f", ORCA_io_dev_info[i].run_percent));
+ total_tape_reads += RAW_disk[i].reads;
+ total_tape_writes += RAW_disk[i].writes;
+ total_tape_readk += RAW_disk[i].kreads;
+ total_tape_writek += RAW_disk[i].kwrites;
+ put_output(sprintf("tape_runp_%s", RAW_disk[i].long_name),
+ sprintf("%16.5f", RAW_disk[i].run_percent));
continue;
}
// Block the listing of floppy drives for now.
- if (ORCA_io_dev_info[i].short_name =~ "^fd.*") {
+ if (RAW_disk[i].short_name[0] == 'f' && RAW_disk[i].short_name[1] == 'd') {
continue;
}
disk_count++;
- put_output(sprintf("disk_runp_%s", ORCA_io_dev_info[i].long_name),
- sprintf("%16.5f", ORCA_io_dev_info[i].run_percent));
+ put_output(sprintf("disk_runp_%s", RAW_disk[i].long_name),
+ sprintf("%16.5f", RAW_disk[i].run_percent));
- put_output(sprintf("disk_svct_%s", ORCA_io_dev_info[i].long_name),
- sprintf("%16.5f", ORCA_io_dev_info[i].service));
+ put_output(sprintf("disk_svct_%s", RAW_disk[i].long_name),
+ sprintf("%16.5f", RAW_disk[i].service));
// Comments from Damon Atkins <Damon.Atkins at nabaus.com.au>. Check
// [wr]lentime to see if an EMC is using a fake disk for control.
@@ -1474,13 +1637,13 @@
#ifdef HAVE_EMC_DISK_CONTROL
if ((pioGLOB_old_wlentime[i] + pioGLOB_old_rlentime[i]) > 1) {
#endif
- total_disk_reads += ORCA_io_dev_info[i].reads;
- total_disk_writes += ORCA_io_dev_info[i].writes;
- total_disk_readk += ORCA_io_dev_info[i].kreads;
- total_disk_writek += ORCA_io_dev_info[i].kwrites;
- mean_disk_busy += ORCA_io_dev_info[i].run_percent;
- if (ORCA_io_dev_info[i].run_percent > peak_disk_busy) {
- peak_disk_busy = ORCA_io_dev_info[i].run_percent;
+ total_disk_reads += RAW_disk[i].reads;
+ total_disk_writes += RAW_disk[i].writes;
+ total_disk_readk += RAW_disk[i].kreads;
+ total_disk_writek += RAW_disk[i].kwrites;
+ mean_disk_busy += RAW_disk[i].run_percent;
+ if (RAW_disk[i].run_percent > peak_disk_busy) {
+ peak_disk_busy = RAW_disk[i].run_percent;
}
#ifdef HAVE_EMC_DISK_CONTROL
}
More information about the Orca-checkins
mailing list