Newsgroups: comp.lang.perl
Path: utzoo!utgpu!news-server.csri.toronto.edu!rpi!batcomputer!theory.tn.cornell.edu!lijewski
From: lijewski@theory.tn.cornell.edu (Mike Lijewski)
Subject: excessive perl memory usage
Message-ID: <1991May2.212216.24563@batcomputer.tn.cornell.edu>
Originator: lijewski@theory.tn.cornell.edu
Sender: news@batcomputer.tn.cornell.edu
Nntp-Posting-Host: theory.tn.cornell.edu
Organization: Cornell National Supercomputer Facility
Date: Thu, 2 May 1991 21:22:16 GMT


Perl users,

Appended is a script called 'governor' which I'm working on.  The
intent is to monitor the usage of one of our frontend machines for
heavy usage, with the intent of niceing or killing cpu bound
processes which should be running on our backend machines.  While
running, I've seen the perl process grow to roughly 10Mbytes on our
IBM 3090 running AIX/370.  The version of perl is 3.44.  I would
appreciate it if anyone could tell me why it is so memory
inefficient.  A typical 'ps -ef' returns 150 or so lines on the
machine.  Thanks.


#!/usr/local/bin/perl

#
# Subroutine used with "sort" to do a numerical sort on the pid field
# of the 'ps -ef' output, which is the second field.  Note that even though
# the pid is in field #2, we are actually checking the "third" field here
# since Perl numbers arrays beginning at 0.  The first blank delimited field
# returned by "split" is null in our case since there is always some 
# whitespace preceding the user name.
#
sub pid { 
    local(@a) = split(/[ \t]+/, $a);  # split on whitespace
    local(@b) = split(/[ \t]+/, $b);  # split on whitespace
    $a[2] <=> $b[2];
}

#
# This subroutine calls "ps -ef", deletes all processes owned by root, and
# then sorts it by pid.
#
sub get_sorted_ps {

    open (PS, 'ps -ef |') || die "Couldn't open ps pipe: $!";
    #open (PS, 'ps |') || die "Couldn't open ps pipe: $!";
    
    @ps2 = <PS>;  # slurp up the ps output
    
    shift @ps2;   # chop off the header

    # Delete root processes and sort by pid.
    @ps2 = sort pid grep(!/^ *root/, @ps2);

    close (PS);
}

#
# This subroutine finds those processes using "too much" CPU time.
#
sub find_bad_dudes {
    local(@merged_pids) = sort pid (@ps1, @ps2);       # merge old and new
    local(@line1) = split(/[ \t]+/, $merged_pids[0]);  # parse first element

    for ($i = 1; $i <= $#merged_pids; $i++) {          # loop through lines
        local(@line2) = split(/[ \t]+/, $merged_pids[$i]);
        # if pids are identical and time fields are different
        if (($line1[2] == $line2[2]) && ($line1[7] ne $line2[7])) {
            # found a potential bad dude
            local(@time1) = split(/:/,$line1[7]);
            local(@time2) = split(/:/,$line2[7]);
            local($cpu_rate) = ((60 * $time2[0] + $time2[1])
                             - (60 * $time1[0] + $time1[1])) / $sleep_interval;
            # make sure cpu rate is positive
            if ($cpu_rate < 0) { $cpu_rate = -$cpu_rate; }
            if ($cpu_rate > $cpu_threshold) {  # we've found a cpu burner
                print "BURNER: @line2";
            }
        }
        @line1 = @line2;  # update last line
    }
}

#
# ********** main routine **********
#

#
# global variables
#
$sleep_interval = 10;   # how long we sleep between checking process statictics
$cpu_threshold = 0.01; # what we consider an unreasonable amount of cpu usage
@ps1 = ();          # contains old "ps" output
@ps2 = ();          # contains new "ps" output

print 'starting...\n';
&get_sorted_ps;  # get most recent process statistics

for (;;) {  # do forever
    sleep $sleep_interval;
    print 'waking up...\n';
    @ps1 = @ps2;     # save previous process statistics
    &get_sorted_ps;  # get most recent process statistics
    &find_bad_dudes;
    #last;
}
exit(0);
-- 
Mike Lijewski  (H)607/272-0238 (W)607/254-8686
Cornell National Supercomputer Facility
ARPA: mjlx@eagle.cnsf.cornell.edu  BITNET: mjlx@cornellf.bitnet
SMAIL:  25 Renwick Heights Road, Ithaca, NY  14850
