[Orca-users] Re: disk run doesn't show all disks
Blair Zajac
blair at gps.caltech.edu
Tue Jun 19 12:43:15 PDT 2001
Mike,
Could you try the attached code to help track down the problem you
we're seeing?
>From the author of the new RAWDISK routines:
"I am enclosing the code that I used to debug the RAWDISK routines. It
emulates either isostat or sar -d (I am not sure which.)
If you could get the person to run it we could isolate if the problem
is with my code."
Regards,
Blair
Mike Jackson wrote:
>
> Blair Zajac wrote:
> >
> > Its not obvious why this is happening. Try removing the line
> >
> > #define USE_RAWDISK 1
> >
> > from orcallator.se and see if this makes a difference.
>
> Ok, I did that and now it shows up in the percol-* files. Orca still
> won't create a RRD for it, and I don't know how to do that myself. So,
> it still doesn't show up in the graphs.
>
> Thanks,
> Mike
-------------- next part --------------
#include <stdio.se>
#include <stdlib.se>
#include <unistd.se>
#include <string.se>
#include <time.se>
#include <kstat.se>
#include <sys_kstat.se>
#include <utsname.se>
#include <p_iostat_class.se>
#include <p_netstat_class.se>
#include <p_vmstat_class.se>
#include <pure_rules.se>
#include <live_rules.se>
#include <mib.se>
#include <tcp_class.se>
#include <tcp_rules.se>
#include <mnt_class.se>
#include <statvfs.se>
#include <proc.se>
#include <fcntl.se>
#include <stat.se>
#define DEFAULT_MAXPID 30000
// Define the different parts of the system you want to examine.
#define WATCH_OS 1
#define WATCH_DISK 1
// The default sampling interval in seconds.
#define SAMPLE_INTERVAL 10
// The maximum number of colums of data.
#define MAX_COLUMNS 512
long interval = SAMPLE_INTERVAL;
string nodename;
string program_name;
int page_size;
long boot_time = 0;
int hz;
ks_system_pages kstat$pages;
ks_system_pages tmp_kstat_pages;
ks_system_misc kstat$misc;
lr_cpu_t lr_cpu$cpu;
lr_cpu_t tmp_lrcpu;
lr_mutex_t lr_mutex$m;
lr_mutex_t tmp_mutex;
lr_net_t lr_net$nr;
lr_net_t tmp_nr;
lr_tcp_t lr_tcp$tcp;
lr_tcp_t tmp_lrtcp;
lr_rpcclient_t lr_rpcclient$r;
lr_rpcclient_t tmp_lrpcc;
lr_disk_t lr_disk$dr;
lr_disk_t tmp_dr;
lr_dnlc_t lr_dnlc$dnlc;
lr_dnlc_t tmp_lrdnlc;
lr_inode_t lr_inode$inode;
lr_inode_t tmp_lrinode;
lr_ram_t lr_ram$ram;
lr_ram_t tmp_lrram;
struct RawDisk {
char long_name[16]; //sysv5 disk name
char short_name[8]; // bsd device name
double reads; // number of bytes read
double kreads;
double writes;
double kwrites;
double avg_wait;
double avg_serv;
double service;
double wait_percent;
double run_percent;
// hidden internal registers to track kstats fields
int _number; // kstat disk #
ulonglong _nread; // number of bytes read
ulonglong _nwritten; //number of bytes written
uint _reads; // number of read operations
uint _writes; // number of write operations
longlong _wtime; // cumulative wait (pre-service) time
longlong _wlentime; // cumulative wait length*time product
longlong _wlastupdate; // last time wait queue changed
longlong _rtime; // cumulative run (service) time
longlong _rlentime; // cumulative run length*time product
longlong _rlastupdate; // last time run queue changed
uint _wcnt; // count of elements in wait state
uint _rcnt; // count of elements in run state
};
RawDisk RAW_disk[256];
int RAW_disk_map=0; // Map short device Names to Long Device Names
int RAW_disk_count=0; // No. of devices Known
double RAW_disk_lastupdate; // Last time Device stats were updated
// Function to scan kstat and map short device names to long device names
raw_disk_map() {
int i;
int j=0;
char long_name[16];
char short_name[16];
//printf ("disk_map");
// 2.6 appears to have a bug where MAX_DISK is +10 too large
// Acutally this is a work around for the A1000 rm6 raid manager
//
// this does not effect Finding physical devices We just miss a few
// slices cXtxd0sX and pseduo disks md??
//for (i=0; i < MAX_DISK - 22; i++) {
for (i=0; i < RAW_disk_count; i++) {
strcpy(long_name,GLOBAL_disk_info[i].long_name);
if ( long_name[0] == 'c' && strchr(long_name, 's') == nil) {
strcpy(short_name,GLOBAL_disk_info[i].short_name);
//printf ("Map [%d] %s %s\n", i, short_name, long_name);
for (j=0; j < RAW_disk_count;j++) {
if ( strcmp(RAW_disk[j].short_name, short_name) == 0) {
strcpy(RAW_disk[j].long_name, long_name);
//printf ("Found [%d] %s %s %s\n",j, RAW_disk[j].short_name, short_name, long_name);
break;
}
}
}
}
RAW_disk_map = 1;
}
raw_disk_update() {
int rdisk;
ulong ul;
kstat_ctl_t kc[1];
kstat_t kp[1];
kstat_t nkp[1];
kstat_io_t kio;
ulonglong _nread;
ulonglong _nwritten;
uint _reads;
uint _writes;
longlong _wtime;
longlong _wlentime;
longlong _wlastupdate;
longlong _rtime;
longlong _rlentime;
longlong _rlastupdate;
longlong _wcnt;
longlong _rcnt;
double read_writes;
double big_etime;
double elapsed_etime;
double hz_etime;
double nanosecond = NANOSEC;
double update;
double delta;
timeval_t time_update[1];
ulong time_void;
char short_name[8];
printf ("disk_update\n");
gettimeofday(time_update,time_void);
update = time_update[0].tv_sec + (time_update[0].tv_usec / 1000000.0);
delta = update - RAW_disk_lastupdate;
RAW_disk_lastupdate = update;
kc[0] = kstat_open();
// read them
if (kstat_read(kc,kp,0) == -1) {
perror("raw_disk_update:kstat_read");
exit(1);
}
//traverse the chain looking for IO events
for (ul=kc[0].kc_chain; ul !=0; ul=nkp[0].ks_next) {
struct_fill(nkp[0],ul);
if ( nkp[0].ks_type == KSTAT_TYPE_IO) {
strcpy(short_name, nkp[0].ks_name);
if ( short_name[0] != 'm' && short_name[0] != 'n' && strchr(short_name,',') == nil) {
// Try to locate device
for ( rdisk=0; rdisk < RAW_disk_count;rdisk++) {
if (strcmp(RAW_disk[rdisk].short_name, short_name) == 0) {break; }
}
// It must be new. Add it!
if ( rdisk == RAW_disk_count) {
// must be a tape drive or something else
RAW_disk_map = 1; // schedule device name map cycle
strcpy(RAW_disk[rdisk].long_name, short_name);
strcpy(RAW_disk[rdisk].short_name, short_name);
RAW_disk[rdisk]._reads=0;
RAW_disk[rdisk]._nread=0;
RAW_disk[rdisk]._rlentime=0;
RAW_disk[rdisk]._rlastupdate= boot_time;
RAW_disk[rdisk]._rcnt = 0;
RAW_disk[rdisk]._writes=0;
RAW_disk[rdisk]._nwritten=0;
RAW_disk[rdisk]._wlentime=0;
RAW_disk[rdisk]._wlastupdate= boot_time;
RAW_disk[rdisk]._wcnt = 0;
RAW_disk_count++;
}
// update the device registers
if (kstat_read(kc,nkp,0) == -1) {
perror("raw_disk_update:kstat_read error");
exit(1);
} else {
// Read sys_kstat device IO Queue to find out about recent activity
// Check that valid data was returned for the device Solaris 2.6 has occasional glitches
// when examining certain disks (c0t0d0) So we cover them up by using the data from
// the previous cycle. Eventually, we will get an update. So far only kio.nread comes
// back as zero. But we are skipping the whole update! This approach probably needs
// More thought!
struct_fill(kio,nkp[0].ks_data);
_nread = kio.nread;
if ( RAW_disk[rdisk]._nread > _nread ) {_nread = RAW_disk[rdisk]._nread;}
_reads = kio.reads;
if ( RAW_disk[rdisk]._reads > _reads ) {_reads = RAW_disk[rdisk]._reads;}
_rlentime = kio.rlentime;
_rtime = kio.rtime;
_rlastupdate = kio.wlastupdate;
_rcnt = kio.rcnt;
_nwritten = kio.nwritten;
if(RAW_disk[rdisk]._nwritten > _nwritten) {_nwritten = RAW_disk[rdisk]._nwritten;}
_writes = kio.writes;
if (RAW_disk[rdisk]._writes > _writes) { _writes = RAW_disk[rdisk]._writes;}
_wlentime = kio.wlentime;
_wtime = kio.wtime;
_wlastupdate = kio.wlastupdate;
_wcnt = kio.wcnt;
elapsed_etime = (_wlastupdate - RAW_disk[rdisk]._wlastupdate);
if ( elapsed_etime > 0) {
hz_etime = elapsed_etime / nanosecond;
big_etime = 1024.0 * hz_etime;
} else {
elapsed_etime = nanosecond;
hz_etime = 1.0;
big_etime = 1024.0;
}
RAW_disk[rdisk].reads=(_reads - RAW_disk[rdisk]._reads) /hz_etime;
RAW_disk[rdisk].kreads=(_nread - RAW_disk[rdisk]._nread) / big_etime;
RAW_disk[rdisk].writes=(_writes - RAW_disk[rdisk]._writes) /hz_etime;
RAW_disk[rdisk].kwrites=(_nwritten - RAW_disk[rdisk]._nwritten) / big_etime;
read_writes= elapsed_etime * (RAW_disk[rdisk].reads + RAW_disk[rdisk].writes) / 1024.0;
if (read_writes > 0) {
RAW_disk[rdisk].avg_wait=(_wlentime - RAW_disk[rdisk]._wlentime) / read_writes;
RAW_disk[rdisk].avg_serv=(_rlentime - RAW_disk[rdisk]._rlentime) / read_writes;
RAW_disk[rdisk].service = RAW_disk[rdisk].avg_wait + RAW_disk[rdisk].avg_serv;
} else {
RAW_disk[rdisk].avg_wait=0.0;
RAW_disk[rdisk].avg_serv=0.0;
RAW_disk[rdisk].service =0.0;
}
// Update the Counters
RAW_disk[rdisk].run_percent= 100.0 * (_rtime - RAW_disk[rdisk]._rtime) / elapsed_etime;
RAW_disk[rdisk].wait_percent= 100.0 * (_wtime - RAW_disk[rdisk]._wtime) / elapsed_etime;
RAW_disk[rdisk]._writes=_writes;
RAW_disk[rdisk]._nwritten=_nwritten;
RAW_disk[rdisk]._wlastupdate = _wlastupdate;
RAW_disk[rdisk]._wlentime=_wlentime;
RAW_disk[rdisk]._wtime=_wtime;
RAW_disk[rdisk]._wcnt=_wcnt;
RAW_disk[rdisk]._reads=_reads;
RAW_disk[rdisk]._nread=_nread;
RAW_disk[rdisk]._rlastupdate = _rlastupdate;
RAW_disk[rdisk]._rlentime=_rlentime;
RAW_disk[rdisk]._rtime=_rtime;
RAW_disk[rdisk]._rcnt=_rcnt;
// printf ("Update %d %s %s nread:%lld\n",rdisk, RAW_disk[rdisk].long_name, RAW_disk[rdisk].short_name, RAW_disk[rdisk]._nread);
}
}
}
}
kstat_close(kc);
// Look up Long device Names for any drives that we just discovered
if ( RAW_disk_map == 1) {
raw_disk_map();
}
}
int main(int argc, string argv[])
{
utsname_t u[1];
long now;
long sleep_till; // Time to sleep to.
tm_t tm_now;
// Get the nodename of the machine.
uname(u);
nodename = u[0].nodename;
program_name = argv[0];
// Initialize the various structures.
initialize();
// Run forever. If WATCH_WEB is defined, then have measure_web()
// do the sleeping while it is watching the access log file until the
// next update time for the whole operating system. Also, collect the
// data from the access log file before printing any output.
for (;;) {
// Calculate the next time to sleep to that is an integer multiple of
// the interval time. Make sure that at least half of the interval
// passes before waking up.
now = time(0);
sleep_till = (now/interval)*interval;
while (sleep_till < now + interval*0.5) {
sleep_till += interval;
}
sleep_till_and_count_new_proceses(sleep_till);
// Get the current time.
now = time(0);
tm_now = localtime(&now);
measure_os(now, tm_now);
}
return 0;
}
initialize()
{
#ifdef WATCH_CPU
int i;
#endif
// Sleep to give the disks a chance to update.
sleep(DISK_UPDATE_RATE);
// Get the clock tick rate.
hz = sysconf(_SC_CLK_TCK);
// Get the page size.
page_size = sysconf(_SC_PAGESIZE);
// Calculate the system boot time.
boot_time = time(0) - (kstat$misc.clk_intr / hz);
// Perform the first measurement of the system.
_measure_os();
printf ("initialize complete\n");
}
// Measure the system statistics all at once.
_measure_os()
{
tmp_lrcpu = lr_cpu$cpu;
tmp_mutex = lr_mutex$m;
tmp_nr = lr_net$nr;
tmp_lrtcp = lr_tcp$tcp;
tmp_lrpcc = lr_rpcclient$r;
tmp_dr = lr_disk$dr;
tmp_lrdnlc = lr_dnlc$dnlc;
tmp_lrinode = lr_inode$inode;
tmp_lrinode = lr_inode$inode;
tmp_lrram = lr_ram$ram;
tmp_kstat_pages = kstat$pages;
raw_disk_update();
//raw_disk_list();
}
measure_os(long now, tm_t tm_now)
{
// Measure the system now.
_measure_os();
// Take care of mount pointes.
// measure_mounts();
// Take care of the disks.
measure_disk();
printf ("Measure Os complete\n");
}
sleep_till_and_count_new_proceses(long sleep_till)
{
long now;
now = time(0);
while (now < sleep_till) {
sleep(sleep_till - now);
now = time(0);
}
}
#ifdef WATCH_MOUNTS
measure_mounts()
{
statvfs_t vfs_array[1];
statvfs_t vfs;
string comment_fmt;
string kbytes_fmt;
string inode_fmt;
string percent_fmt;
ulong kbytes_used;
ulong inodes_used;
double block_factor;
int comment_length;
int previous_count = -1;
int current_count;
current_count = 0;
// Traverse the mount table to find mounted ufs/vxfs file systems.
for (mnt$mnt.number$=0; mnt$mnt.number$ != -1; mnt$mnt.number$++) {
tmp_mnt = mnt$mnt;
if (tmp_mnt.mnt_fstype == "ufs" || tmp_mnt.mnt_fstype == "vxfs") {
// Skip locally mounted /cdrom partitions.
if (tmp_mnt.mnt_mountp =~ "^/cdrom/") {
continue;
}
if (statvfs(tmp_mnt.mnt_mountp, vfs_array) == -1) {
continue;
}
vfs = vfs_array[0];
++current_count;
// Generate the format strings for the comment and for the data.
comment_fmt = sprintf("mnt%%c_%s", tmp_mnt.mnt_mountp);
comment_length = strlen(comment_fmt) - 1;
kbytes_fmt = sprintf("%%%d.0f", comment_length);
inode_fmt = sprintf("%%%dld", comment_length);
percent_fmt = sprintf("%%%d.3f", comment_length);
// Calculate the number of 1 kilobyte blocks on the disk.
block_factor = vfs.f_frsize/1024;
// Capital letters refer to the disk usage in kilobytes. Lower case
// letters refer to inode usage.
// C - Capacity of the disk.
// U - Used capacity.
// A - Available capacity for non-root users.
// P - Percent used.
kbytes_used = vfs.f_blocks - vfs.f_bfree;
inodes_used = vfs.f_files - vfs.f_ffree;
//put_output(sprintf(comment_fmt, 'C'),
// sprintf(kbytes_fmt, block_factor*vfs.f_blocks));
//put_output(sprintf(comment_fmt, 'U'),
// sprintf(kbytes_fmt, block_factor*kbytes_used));
//put_output(sprintf(comment_fmt, 'A'),
// sprintf(kbytes_fmt, block_factor*vfs.f_bavail));
//put_output(sprintf(comment_fmt, 'P'),
// sprintf(percent_fmt, 100.0*kbytes_used/(vfs.f_blocks + vfs.f_bavail - vfs.f_bfree)));
//put_output(sprintf(comment_fmt, 'c'),
// sprintf(inode_fmt, vfs.f_files));
//put_output(sprintf(comment_fmt, 'u'),
// sprintf(inode_fmt, inodes_used));
//put_output(sprintf(comment_fmt, 'a'),
// sprintf(inode_fmt, vfs.f_favail));
//put_output(sprintf(comment_fmt, 'p'),
// sprintf(percent_fmt, 100.0*inodes_used/(vfs.f_files + vfs.f_favail - vfs.f_ffree)));
}
}
}
#endif
#ifdef WATCH_DISK
measure_disk()
{
double mean_disk_busy;
double peak_disk_busy;
double total_reads;
double total_writes;
double total_readk;
double total_writek;
int i;
mean_disk_busy = 0.0;
peak_disk_busy = 0.0;
total_reads = 0.0;
total_writes = 0.0;
total_readk = 0.0;
total_writek = 0.0;
printf ("measure_disk\n");
printf ("device r/s w/s kr/s kw/s wait svc_t w b\n");
for (i=0; i<RAW_disk_count; i++) {
// Display RAW_disk Stats & mark with *
printf("*%6s", RAW_disk[i].long_name);
printf("%5.1f", RAW_disk[i].reads);
printf("%5.1f", RAW_disk[i].writes);
printf("%10.1f", RAW_disk[i].kreads);
printf("%10.1f", RAW_disk[i].kwrites);
printf("%6.1f", RAW_disk[i].avg_wait);
printf("%6.1f", RAW_disk[i].service);
printf("%6.1f", RAW_disk[i].wait_percent);
printf("%6.1f\n", RAW_disk[i].run_percent);
total_reads += RAW_disk[i].reads;
total_writes += RAW_disk[i].writes;
total_readk += RAW_disk[i].kreads;
total_writek += RAW_disk[i].kwrites;
mean_disk_busy += RAW_disk[i].run_percent;
if (RAW_disk[i].run_percent > peak_disk_busy) {
peak_disk_busy = RAW_disk[i].run_percent;
}
// Display Normal Disk Stats
printf(" %6s", GLOBAL_disk[i].info.long_name);
printf("%5.1f", GLOBAL_disk[i].reads);
printf("%5.1f", GLOBAL_disk[i].writes);
printf("%10.1f", GLOBAL_disk[i].kreads);
printf("%10.1f", GLOBAL_disk[i].kwrites);
printf("%6.1f", GLOBAL_disk[i].avg_wait);
printf("%6.1f", GLOBAL_disk[i].service);
printf("%6.1f", GLOBAL_disk[i].wait_percent);
printf("%6.1f\n", GLOBAL_disk[i].run_percent);
}
if (RAW_disk_count != 0) {
mean_disk_busy = mean_disk_busy/RAW_disk_count;
}
printf("Measure Disks complete\n");
}
#endif
More information about the Orca-users
mailing list