[Orca-users] Re: disk run doesn't show all disks

Blair Zajac blair at gps.caltech.edu
Tue Jun 19 12:43:15 PDT 2001


Mike,

Could you try the attached code to help track down the problem you
we're seeing?

>From the author of the new RAWDISK routines:

"I am enclosing the code that I used to debug the RAWDISK routines.  It
 emulates either isostat or sar -d (I am not sure which.)

 If you could get the person to run it we could isolate if the problem
 is with my code."

Regards,
Blair

Mike Jackson wrote:
> 
> Blair Zajac wrote:
> >
> > Its not obvious why this is happening.  Try removing the line
> >
> > #define USE_RAWDISK             1
> >
> > from orcallator.se and see if this makes a difference.
> 
> Ok, I did that and now it shows up in the percol-* files. Orca still
> won't create a RRD for it, and I don't know how to do that myself. So,
> it still doesn't show up in the graphs.
> 
> Thanks,
> Mike
-------------- next part --------------
#include <stdio.se>
#include <stdlib.se>
#include <unistd.se>
#include <string.se>
#include <time.se>
#include <kstat.se>
#include <sys_kstat.se>
#include <utsname.se>
#include <p_iostat_class.se>
#include <p_netstat_class.se>
#include <p_vmstat_class.se>
#include <pure_rules.se>
#include <live_rules.se>
#include <mib.se>
#include <tcp_class.se>
#include <tcp_rules.se>
#include <mnt_class.se>
#include <statvfs.se>
#include <proc.se>
#include <fcntl.se>
#include <stat.se>

#define DEFAULT_MAXPID 30000
// Define the different parts of the system you want to examine.
#define WATCH_OS 	1
#define WATCH_DISK      1 

// The default sampling interval in seconds.
#define SAMPLE_INTERVAL         10
// The maximum number of colums of data.
#define MAX_COLUMNS             512
long interval = SAMPLE_INTERVAL;

string nodename;
string program_name;
int page_size;
long boot_time = 0;
int hz;

ks_system_pages kstat$pages;
ks_system_pages tmp_kstat_pages;
ks_system_misc  kstat$misc;
lr_cpu_t        lr_cpu$cpu;
lr_cpu_t        tmp_lrcpu;
lr_mutex_t      lr_mutex$m;
lr_mutex_t      tmp_mutex;
lr_net_t        lr_net$nr;
lr_net_t        tmp_nr;
lr_tcp_t        lr_tcp$tcp;
lr_tcp_t        tmp_lrtcp;
lr_rpcclient_t  lr_rpcclient$r;
lr_rpcclient_t  tmp_lrpcc;
lr_disk_t       lr_disk$dr;
lr_disk_t       tmp_dr;
lr_dnlc_t       lr_dnlc$dnlc;
lr_dnlc_t       tmp_lrdnlc;
lr_inode_t      lr_inode$inode;
lr_inode_t      tmp_lrinode;
lr_ram_t        lr_ram$ram;
lr_ram_t        tmp_lrram;



struct  RawDisk {
    char long_name[16];	//sysv5 disk name
    char short_name[8];	// bsd device name
    double reads;              // number of bytes read
    double kreads;
    double writes;
    double kwrites;
    double avg_wait;
    double avg_serv;
    double service;
    double wait_percent;
    double run_percent;
   
    // hidden internal registers to track kstats fields
    int	_number;	    // kstat disk #
    ulonglong _nread;       // number of bytes read 
    ulonglong _nwritten;    //number of bytes written
    uint   _reads;	    // number of read operations
    uint   _writes;	    // number of write operations
    longlong _wtime;        // cumulative wait (pre-service) time
    longlong _wlentime;     // cumulative wait length*time product
    longlong _wlastupdate;  // last time wait queue changed
    longlong _rtime;        // cumulative run (service) time
    longlong _rlentime;     // cumulative run length*time product
    longlong _rlastupdate;  // last time run queue changed
    uint     _wcnt;         // count of elements in wait state
    uint     _rcnt;         // count of elements in run state
};

RawDisk  RAW_disk[256];

int	RAW_disk_map=0;        //  Map short device Names to Long Device Names 
int	RAW_disk_count=0;      // No. of devices Known
double   RAW_disk_lastupdate;  // Last time Device stats were updated



// Function to scan kstat and map short device names to long device names
raw_disk_map() {
   int i;
   int j=0;
   char long_name[16];
   char short_name[16];

   //printf ("disk_map");

   // 2.6  appears to have a bug where MAX_DISK is +10 too large
   // Acutally this is a work around for the A1000 rm6 raid manager
   //  
   // this does not effect Finding physical devices We just miss a few
   // slices cXtxd0sX  and pseduo disks md?? 
   //for (i=0; i < MAX_DISK - 22; i++) {
   for (i=0; i < RAW_disk_count; i++) {
     strcpy(long_name,GLOBAL_disk_info[i].long_name);
     if ( long_name[0] == 'c' &&  strchr(long_name, 's') == nil) {
       strcpy(short_name,GLOBAL_disk_info[i].short_name);
       //printf ("Map [%d] %s %s\n", i, short_name, long_name);
       for (j=0; j < RAW_disk_count;j++) {
	  if (  strcmp(RAW_disk[j].short_name, short_name) == 0) {
	        strcpy(RAW_disk[j].long_name, long_name);
                //printf ("Found [%d] %s %s %s\n",j, RAW_disk[j].short_name, short_name, long_name);
                break;
          }
       }
     }
   }	
   RAW_disk_map = 1;
}


raw_disk_update() {
 int rdisk;
 ulong ul;
 kstat_ctl_t kc[1];
 kstat_t kp[1];
 kstat_t nkp[1];
 kstat_io_t kio;
 ulonglong _nread;
 ulonglong _nwritten;
 uint _reads;
 uint _writes;
 longlong _wtime;
 longlong _wlentime;
 longlong _wlastupdate;
 longlong _rtime;
 longlong _rlentime;
 longlong _rlastupdate;
 longlong _wcnt;
 longlong _rcnt;

 double read_writes;
 double big_etime;
 double elapsed_etime;
 double hz_etime;
 double nanosecond = NANOSEC;
 double update; 
 double  delta;
 timeval_t  time_update[1];
 ulong  time_void;

 char  short_name[8];
  
 printf ("disk_update\n");

 gettimeofday(time_update,time_void);
 update = time_update[0].tv_sec + (time_update[0].tv_usec / 1000000.0);
 delta = update  - RAW_disk_lastupdate;
 RAW_disk_lastupdate = update;
 
 kc[0] = kstat_open();
 // read them
 if (kstat_read(kc,kp,0) == -1) {
    perror("raw_disk_update:kstat_read");
    exit(1);
 }
 //traverse the chain looking for IO events
 for (ul=kc[0].kc_chain; ul !=0; ul=nkp[0].ks_next) {
    struct_fill(nkp[0],ul);
    if ( nkp[0].ks_type == KSTAT_TYPE_IO) {
       strcpy(short_name, nkp[0].ks_name);
       if (  short_name[0] != 'm' &&  short_name[0] != 'n'  &&  strchr(short_name,',') == nil) {
	 //  Try to locate device
         for ( rdisk=0; rdisk < RAW_disk_count;rdisk++) {
            if (strcmp(RAW_disk[rdisk].short_name, short_name) == 0)  {break; }
         }

         // It must be new. Add it!  
         if ( rdisk == RAW_disk_count) {
          // must be a tape drive or something else 
             RAW_disk_map = 1;			// schedule device name map cycle 
	     strcpy(RAW_disk[rdisk].long_name, short_name);
	     strcpy(RAW_disk[rdisk].short_name, short_name);
     	     RAW_disk[rdisk]._reads=0;
     	     RAW_disk[rdisk]._nread=0;
     	     RAW_disk[rdisk]._rlentime=0;
     	     RAW_disk[rdisk]._rlastupdate= boot_time;
             RAW_disk[rdisk]._rcnt = 0;
             RAW_disk[rdisk]._writes=0;
             RAW_disk[rdisk]._nwritten=0;
             RAW_disk[rdisk]._wlentime=0;
             RAW_disk[rdisk]._wlastupdate= boot_time;
             RAW_disk[rdisk]._wcnt = 0;
             RAW_disk_count++;
         }

         // update the device registers 
	 if (kstat_read(kc,nkp,0) == -1) {
	    perror("raw_disk_update:kstat_read error");
	    exit(1);
         } else {
            //  Read sys_kstat device IO Queue  to find out about recent activity
	    // Check that valid data was returned for the device  Solaris 2.6 has occasional glitches 
            // when examining certain disks (c0t0d0) So  we cover them up by using the data from
            // the previous cycle.  Eventually, we will get an update.  So far only kio.nread comes
            // back as zero.  But we are skipping the whole update!  This approach probably needs
            // More thought!
	    struct_fill(kio,nkp[0].ks_data);
             _nread  =  kio.nread; 
             if ( RAW_disk[rdisk]._nread > _nread ) {_nread = RAW_disk[rdisk]._nread;}
             _reads = kio.reads; 
             if ( RAW_disk[rdisk]._reads > _reads ) {_reads = RAW_disk[rdisk]._reads;}
             _rlentime = kio.rlentime;
             _rtime = kio.rtime;
             _rlastupdate = kio.wlastupdate;
             _rcnt = kio.rcnt;
             _nwritten = kio.nwritten;
              if(RAW_disk[rdisk]._nwritten > _nwritten) {_nwritten = RAW_disk[rdisk]._nwritten;}
             _writes = kio.writes;
 	     if (RAW_disk[rdisk]._writes > _writes)  { _writes = RAW_disk[rdisk]._writes;}
             _wlentime = kio.wlentime;
             _wtime = kio.wtime;
             _wlastupdate = kio.wlastupdate;
             _wcnt = kio.wcnt;

	     elapsed_etime = (_wlastupdate - RAW_disk[rdisk]._wlastupdate);
             if ( elapsed_etime > 0)  { 
	        hz_etime = elapsed_etime / nanosecond;
 	    	big_etime = 1024.0 * hz_etime;
             } else {
	 	elapsed_etime = nanosecond;
		hz_etime = 1.0;
		big_etime = 1024.0;
             }	
             RAW_disk[rdisk].reads=(_reads - RAW_disk[rdisk]._reads) /hz_etime;
             RAW_disk[rdisk].kreads=(_nread - RAW_disk[rdisk]._nread) / big_etime;
             RAW_disk[rdisk].writes=(_writes - RAW_disk[rdisk]._writes) /hz_etime;
             RAW_disk[rdisk].kwrites=(_nwritten - RAW_disk[rdisk]._nwritten) / big_etime;
	   
             read_writes=  elapsed_etime * (RAW_disk[rdisk].reads + RAW_disk[rdisk].writes) / 1024.0;
             if (read_writes > 0) {
                RAW_disk[rdisk].avg_wait=(_wlentime - RAW_disk[rdisk]._wlentime) / read_writes;
                RAW_disk[rdisk].avg_serv=(_rlentime - RAW_disk[rdisk]._rlentime) / read_writes;
                RAW_disk[rdisk].service = RAW_disk[rdisk].avg_wait + RAW_disk[rdisk].avg_serv;
             } else {
                RAW_disk[rdisk].avg_wait=0.0;
                RAW_disk[rdisk].avg_serv=0.0;
                RAW_disk[rdisk].service =0.0;
	     }
	     // Update the Counters	
             RAW_disk[rdisk].run_percent= 100.0 * (_rtime  - RAW_disk[rdisk]._rtime) / elapsed_etime;
             RAW_disk[rdisk].wait_percent= 100.0 * (_wtime - RAW_disk[rdisk]._wtime) / elapsed_etime;
	     RAW_disk[rdisk]._writes=_writes;
             RAW_disk[rdisk]._nwritten=_nwritten;
             RAW_disk[rdisk]._wlastupdate = _wlastupdate;
             RAW_disk[rdisk]._wlentime=_wlentime;
             RAW_disk[rdisk]._wtime=_wtime;
             RAW_disk[rdisk]._wcnt=_wcnt;
             RAW_disk[rdisk]._reads=_reads;
             RAW_disk[rdisk]._nread=_nread;
             RAW_disk[rdisk]._rlastupdate = _rlastupdate;
             RAW_disk[rdisk]._rlentime=_rlentime;
             RAW_disk[rdisk]._rtime=_rtime;
             RAW_disk[rdisk]._rcnt=_rcnt;
     	     // printf ("Update %d %s %s nread:%lld\n",rdisk, RAW_disk[rdisk].long_name, RAW_disk[rdisk].short_name, RAW_disk[rdisk]._nread); 
	 }
       }
    }
 }
 kstat_close(kc);
 // Look up Long device Names for any drives that we just discovered 
 if ( RAW_disk_map == 1) {
      raw_disk_map();
 }

}


int main(int argc, string argv[])
{
  utsname_t u[1];
  long      now;
  long      sleep_till;	// Time to sleep to.
  tm_t      tm_now;

  // Get the nodename of the machine.
  uname(u);
  nodename = u[0].nodename;

  program_name = argv[0];

  // Initialize the various structures.
  initialize();

  // Run forever.  If WATCH_WEB is defined, then have measure_web()
  // do the sleeping while it is watching the access log file until the
  // next update time for the whole operating system.  Also, collect the
  // data from the access log file before printing any output.
  for (;;) {
    // Calculate the next time to sleep to that is an integer multiple of
    // the interval time.  Make sure that at least half of the interval
    // passes before waking up.
    now        = time(0);
    sleep_till = (now/interval)*interval;
    while (sleep_till < now + interval*0.5) {
      sleep_till += interval;
    }
    sleep_till_and_count_new_proceses(sleep_till);

    // Get the current time.
    now    = time(0);
    tm_now = localtime(&now);

    measure_os(now, tm_now);

  }
  return 0;
}

initialize()
{
#ifdef WATCH_CPU
  int i;
#endif


  // Sleep to give the disks a chance to update.
  sleep(DISK_UPDATE_RATE);

  // Get the clock tick rate.
  hz = sysconf(_SC_CLK_TCK);

  // Get the page size.
  page_size = sysconf(_SC_PAGESIZE);

  // Calculate the system boot time.
  boot_time = time(0) - (kstat$misc.clk_intr / hz);

  // Perform the first measurement of the system.
  _measure_os();
    printf ("initialize complete\n");
}






// Measure the system statistics all at once.
_measure_os()
{
  tmp_lrcpu         = lr_cpu$cpu;
  tmp_mutex         = lr_mutex$m;
  tmp_nr            = lr_net$nr;
  tmp_lrtcp         = lr_tcp$tcp;
  tmp_lrpcc 	    = lr_rpcclient$r;
  tmp_dr	    = lr_disk$dr;
  tmp_lrdnlc	    = lr_dnlc$dnlc;
  tmp_lrinode	    = lr_inode$inode;
  tmp_lrinode	    = lr_inode$inode;
  tmp_lrram	    = lr_ram$ram;
  tmp_kstat_pages  = kstat$pages;
  raw_disk_update();
  //raw_disk_list();
}

measure_os(long now, tm_t tm_now)
{
  // Measure the system now.
  _measure_os();

  // Take care of mount pointes.
  // measure_mounts();

  // Take care of the disks.
  measure_disk();
  printf ("Measure Os  complete\n");
}


sleep_till_and_count_new_proceses(long sleep_till)
{
  long   now;

  now = time(0);
  while (now < sleep_till) {
    sleep(sleep_till - now);
    now = time(0);
  }
}

#ifdef WATCH_MOUNTS
measure_mounts()
{
  statvfs_t vfs_array[1];
  statvfs_t vfs;
  string    comment_fmt;
  string    kbytes_fmt;
  string    inode_fmt;
  string    percent_fmt;
  ulong     kbytes_used;
  ulong     inodes_used;
  double    block_factor;
  int       comment_length;
  int       previous_count = -1;
  int       current_count;

  current_count = 0;
  // Traverse the mount table to find mounted ufs/vxfs file systems.
  for (mnt$mnt.number$=0; mnt$mnt.number$ != -1; mnt$mnt.number$++) {
    tmp_mnt = mnt$mnt;
    if (tmp_mnt.mnt_fstype == "ufs" || tmp_mnt.mnt_fstype == "vxfs") {
      // Skip locally mounted /cdrom partitions.
      if (tmp_mnt.mnt_mountp =~ "^/cdrom/") {
        continue;
      }
      if (statvfs(tmp_mnt.mnt_mountp, vfs_array) == -1) {
        continue;
      }
      vfs = vfs_array[0];
      ++current_count;

      // Generate the format strings for the comment and for the data.
      comment_fmt    = sprintf("mnt%%c_%s", tmp_mnt.mnt_mountp);
      comment_length = strlen(comment_fmt) - 1;
      kbytes_fmt     = sprintf("%%%d.0f",   comment_length);
      inode_fmt      = sprintf("%%%dld",    comment_length);
      percent_fmt    = sprintf("%%%d.3f",   comment_length);

      // Calculate the number of 1 kilobyte blocks on the disk.
      block_factor = vfs.f_frsize/1024;

      // Capital letters refer to the disk usage in kilobytes.  Lower case
      // letters refer to inode usage.
      // C - Capacity of the disk.
      // U - Used capacity.
      // A - Available capacity for non-root users.
      // P - Percent used.
      kbytes_used = vfs.f_blocks - vfs.f_bfree;
      inodes_used = vfs.f_files  - vfs.f_ffree;

      //put_output(sprintf(comment_fmt, 'C'),
      //          sprintf(kbytes_fmt, block_factor*vfs.f_blocks));
      //put_output(sprintf(comment_fmt, 'U'),
      //           sprintf(kbytes_fmt, block_factor*kbytes_used));
      //put_output(sprintf(comment_fmt, 'A'),
      //          sprintf(kbytes_fmt, block_factor*vfs.f_bavail));
      //put_output(sprintf(comment_fmt, 'P'),
      //          sprintf(percent_fmt, 100.0*kbytes_used/(vfs.f_blocks + vfs.f_bavail - vfs.f_bfree)));

      //put_output(sprintf(comment_fmt, 'c'),
      //           sprintf(inode_fmt, vfs.f_files));
      //put_output(sprintf(comment_fmt, 'u'),
      //           sprintf(inode_fmt, inodes_used));
      //put_output(sprintf(comment_fmt, 'a'),
      //           sprintf(inode_fmt, vfs.f_favail));
      //put_output(sprintf(comment_fmt, 'p'),
      //           sprintf(percent_fmt, 100.0*inodes_used/(vfs.f_files + vfs.f_favail - vfs.f_ffree)));
    }
  }

}
#endif

#ifdef WATCH_DISK
measure_disk()
{
  double mean_disk_busy;
  double peak_disk_busy;
  double total_reads;
  double total_writes;
  double total_readk;
  double total_writek;
  int    i;
  mean_disk_busy = 0.0;
  peak_disk_busy = 0.0;
  total_reads    = 0.0;
  total_writes   = 0.0;
  total_readk    = 0.0;
  total_writek   = 0.0;
  printf ("measure_disk\n");
  printf ("device  r/s  w/s      kr/s      kw/s  wait svc_t   w     b\n");
  for (i=0; i<RAW_disk_count; i++) {
 // Display RAW_disk Stats  & mark with *
  printf("*%6s", RAW_disk[i].long_name);
  printf("%5.1f", RAW_disk[i].reads);
  printf("%5.1f", RAW_disk[i].writes);
  printf("%10.1f", RAW_disk[i].kreads);
  printf("%10.1f", RAW_disk[i].kwrites);
  printf("%6.1f", RAW_disk[i].avg_wait);
  printf("%6.1f", RAW_disk[i].service);
  printf("%6.1f", RAW_disk[i].wait_percent);
  printf("%6.1f\n", RAW_disk[i].run_percent);
  total_reads     += RAW_disk[i].reads;
  total_writes    += RAW_disk[i].writes;
  total_readk    += RAW_disk[i].kreads;
  total_writek    += RAW_disk[i].kwrites;
  mean_disk_busy += RAW_disk[i].run_percent;
  if (RAW_disk[i].run_percent > peak_disk_busy) {
      peak_disk_busy = RAW_disk[i].run_percent;
    }
  // Display Normal Disk Stats
  printf(" %6s", GLOBAL_disk[i].info.long_name);
  printf("%5.1f", GLOBAL_disk[i].reads);
  printf("%5.1f", GLOBAL_disk[i].writes);
  printf("%10.1f", GLOBAL_disk[i].kreads);
  printf("%10.1f", GLOBAL_disk[i].kwrites);
  printf("%6.1f", GLOBAL_disk[i].avg_wait);
  printf("%6.1f", GLOBAL_disk[i].service);
  printf("%6.1f", GLOBAL_disk[i].wait_percent);
  printf("%6.1f\n", GLOBAL_disk[i].run_percent);
  }
  if (RAW_disk_count != 0) {
    mean_disk_busy = mean_disk_busy/RAW_disk_count;
  }
  printf("Measure Disks complete\n");
}
#endif


More information about the Orca-users mailing list