[Orca-dev] [PATCH] r528/data_gatherers/winallator/SourceFile.pm-patch-with-r528.txt

Mon Apr 10 09:54:07 PDT 2006

Patched by: Dave "Dragon" Michaels <dragon at raytheon.com> Jan-Apr 2006
            Andy Fox July 2002
Review by: Vicky P. Retzke <vpretzke at overnite.com>

* r528/data_gatherers/winallator/SourceFile.pm-patch-with-r528.txt
    - This file replaces SourceFile.pm-patch-with-r520.txt and older
    - the patch-with-r520.txt patch doesn't work on r528
    - confirmed to work on r528 by Vicky P. Retzke
    - Using the 520 patch, the following error will be encountered:
./bin/orca: warning: cannot find date 'timestamp' in 
'/users/nrgo600/var/worca/winallator/testdir/winallator_2006040416.tsv'.
    - New patch introduces many additional changes:
       o Windows-specific data cleanup is only run on Windows(perfmon)
         data files (identified by "PDH" in first column).  This allows
         for one orca installation to handle both Unix and Windows files 
safely
       o Removed ambiguity and added comments describing Windows data 
cleanup
         process
       o Tolerates Windows EOL(^M) without dos2unix'ing original data file
       o Added verbosity support, removed extraneous prints
       o If Windows data file, Convert Windows integers to floats
       o If Windows data file, Convert Windows blank fields (" ") to 0.0
       o If Windows data file, convert timestamp accordingly

Note: Around line 1060 is a weird while/subst construct.  It works, but 
it's stupid (no idea what I was smoking that day).  I have a fix, but 
it's untested, so I'm leaving stupidity in place.  I just happened to 
notice it while writing the above change log.

*** SourceFile.pm    Sun Nov 27 22:48:49 2005
--- Winallated-SourceFile.pm    Wed Jan 11 11:35:16 2006
***************
*** 66,71 ****
--- 66,77 ----
  my %my_rrd_list_cache;
  my %choose_data_sub_cache;

+ # boolean to distinguish Windows logfiles
+ # from Unix ones.  Windows logfiles require
+ # special processing.
+ use Time::Local;
+ my $is_windows = 0;
+
  # Use a blessed reference to an array as the storage for this class.
  # Since this class is a subclass of Orca::DataFile, append to the
  # end of the Orca::DataFile array the values needed by this class
***************
*** 172,178 ****
--- 178,250 ----
        chomp($line);
        if ($line) {
          $self->[I_FIRST_LINE] = 1;
+
+         # Windows logfile parsing
+         #   - Dave "Dragon" Michaels, Tue Jan 10 13:40:51 2006
+         #   - Andy Fox, 2jul2002
+         # if this is a Windows/perfmon file, it needs to be orcafied
+         # The first line of a Windows/perfmon file has
+         # "(PDH-{blah}) ({time-blah}(blah)"
+         # in the first field. (including the quotes!)
+         # Windows perfmon files are tab-separated
+         if ( $line =~ m/^\"\(PDH/ ) {
+             # this must be a Windows log file.
+             $is_windows = 1;
+
+             # print the pre-processed line to the log file, if verbose
+             if ($opt_verbose) { print "raw header line: $line\n"; }
+
+             # fix field names:
+             #   - spaces in fields become _'s
+             $line =~ s/ /_/g;
+
+             #   - delete matches to \\blah\ (that's the machine
+             #     name, which is already represented by the filename,
+             #     and would cause machine-specific fields where
+             #     such isn't necessary.  E.g.:
+             #     \\MAMMATU06\Processor(0)\% Processor Time
+             $line =~ s/\\\\[^\\ ]+\\//g;
+
+             #   - get rid of quotes; they're extraneous at this point
+             $line =~ s/\"//g;
+            
+             #   - turn \'s into _'s, since \ has special meaning and
+             #     would not translate well when embedded in html 
filenames.
+             $line =~ s/\\/_/g;
+
+             #   - replace ?'s with nothing, for similar reasons as \
+             $line =~ s/\?//g;
+
+             #   - replace ('s and )'s with _, since () has special meaning
+             #     in the perl regexps, and would just confuse things
+             $line =~ s/\(/_/g;
+             $line =~ s/\)/_/g;
+            
+             #   - nuke any ,'s or :'s in fields
+             $line =~ s/,//g;
+             $line =~ s/://g;
+
+             #   - reduce extra __'s caused by above substitutionsto one _,
+             #     for a more legible field name.  Also, remove leading _'s
+             $line =~ s/[_]+/_/g;
+             $line =~ s/^_//g;
+             $line =~ s/(\s)_/$1/g;
+
+             #   - nuke any ^M's (typically at the end of the line)
+             $line =~ s/\015//g;
+             # print the resultant line to the log file, if verbosity 
is engaged
+             if ($opt_verbose) { print "processed header line: $line\n"; }
+         }
+         # just in case the unix file is tab-separated, it's now safe
+         # to convert tabs to spaces
+         $line =~ s/\t/ /g;
          @column_description = split(' ', $line);
+         if ($is_windows == 1) {
+             # the windows timestamp field is a funky string;
+             # make it the Orca-standard 'timestamp' string, for
+             # later parsing
+             $column_description[0]="timestamp";
+         }
        } else {
          warn "$0: warning: no first_line for '$filename' yet.\n";
          $open_file_cache->close($fid) or
***************
*** 963,970 ****
      # Skip the line if the word timestamp appears in it.  This is a
      # temporary fix for orcallator.se to place a new information line
      # in the output file when it starts up.
!     next if $line =~ /timestamp/;

      my @line = split(' ', $line);

      # Skip this input line if 1) the file uses the first line to
--- 1035,1071 ----
      # Skip the line if the word timestamp appears in it.  This is a
      # temporary fix for orcallator.se to place a new information line
      # in the output file when it starts up.
!     # Windows files have "PDH-TSV" instead of "timestamp"
!     next if $line =~ /timestamp|PDH-TSV/;

+     if ($opt_verbose) { print "raw data line: $line\n"; };
+     if ( $is_windows == 1 ) {
+         # strip the the linefeed
+         chomp($line);
+         # remove ^M (CR)
+         $line =~ s/\015//g;
+         # perfmon sometimes generates empty fields, as " "
+         # replace them with a value, 0.0
+         $line =~ s/\" \"/0.0/g;
+         # now there should only be tabs and numbers (all in quotes), 
but just
+         # in case there are any spaces left, turn them into :'s
+         $line =~ s/ /:/g;
+         # The rest of the fields are surrounded by "s, so nuke 'em all
+         $line =~ s/\"//g;
+         # perfmon sometimes generates integer fields.  Orca expects
+         # floats.  So, turn each integer into a float by adding .0
+         # This seems like a clumsy way to do this, but it works (is
+         # there a better way?)
+         $_ = $line;
+         while (s/(\s|^)(\d+)(\s)/$1$2.0$3/g) {
+         }
+         $line = $_;
+         if ($opt_verbose) { print "parsed Windows data line: $line\n"; };
+     }
+
+     # just in case the unix file is tab-separated, it's now safe
+     # to convert tabs to spaces
+     $line =~ s/\t/ /g;
      my @line = split(' ', $line);

      # Skip this input line if 1) the file uses the first line to
***************
*** 984,989 ****
--- 1085,1133 ----
      } else {
        $time = $line[$date_column_index];
      }
+
+     if ($is_windows == 1) {
+         # Windows timestamp and Unix timestamp are different.
+         # Convert Windows timestamp to Unix-style timestamp, for
+         # consistency in later computations
+         # At this stage the date is in this format: 06/18/2002 
21:56:06.096
+         # First, turn /s and .s into :s
+         $time =~ s/\//:/g;
+         $time =~ s/\./:/;
+         # Now we have this: "06:18:2002:21:56:06:096"
+         # Break it up, and construct a unix timestamp out of it
+         my @wintime = split(':', $time);
+         my ($sec, $min, $hr, $day, $mon, $yr);
+
+         $mon = $wintime[0];
+         $day = $wintime[1];
+         $yr = $wintime[2];
+         $hr = $wintime[3];
+         $min = $wintime[4];
+         $sec = $wintime[5];
+
+         if ($opt_verbose) {
+             print "parsing windows time @wintime into parts:\n";
+             print "mon = $mon\n";
+             print "day = $day\n";
+             print "yr = $yr\n";
+             print "hr = $hr\n";
+             print "min = $min\n";
+             print "sec = $sec\n";
+         }
+         $mon -= 1;
+         $yr -= 1900;
+         if ($opt_verbose) {
+             print "modified mon = $mon\n";
+             print "modified yr = $yr\n";
+         }
+
+         my ($blur);
+         $blur = timelocal($sec, $min, $hr, $day, $mon, $yr);
+         $time = $blur;
+         if ($opt_verbose) { print "resultant unix timestamp is: 
$time\n"; }
+       }
+
      $last_data_time = $time if $time > $last_data_time;

      # If the file status from the source data file is greater than