I got tired of finding gigantic tumor-like growths left over from the prior sysadmin's lack of filesystem caretaking, so I wrote this little script. When invoked, it sniffs around for bloated files and emails me a list of the main offenders. In this case, it looks for the largest 100 files above 4MB (4096K, ala 8192 blocks).. Feel free to tweak to your hearts delight.
#!/usr/bin/perl
###
### BloatDetector.pl v0.1 written 031405:1749 by BJP
###
### Builds a report showing the top 100 biggest-sized files, excluding
### oracle stuff, backups, etc... Thats what the back|archiv|ora excl
+usion stuff does. :)
###
use Mail::Sendmail;
$reportFile="/tmp/bloatdetector.tmp";
$recipient="youremailaddress\@goes.here.com";
chomp($hostName=`hostname`); # $HOSTNAME is non-ubiquitous. Ugh.
$sender=$ENV{"USER"}."\@".$hostName;
chomp($dateStamp=`date`);
print "BloatDetector: Recipient is [$recipient]\n";
print "BloatDetector: Sender is [$sender]\n";
print "BloatDetector: Scanning files..This may take a while.\n";
@bloatedFiles = `find / -depth -size +8192 -ls| grep -i -v -E \"back|a
+rchiv|ora?\"| sort -r -n +6 | cut -d" " -f2- | head -n100`;
open(FILE,"+>>$reportFile") or die ("BloatDetector: Can't open tempora
+ry logfile. My life was short, yet sweet -- grieve not for me, my fri
+end.");
print "BloatDetector: Preparing report..\n";
print FILE "\n\n\nHere's the latest BloatDetector report from $hostNam
+e..\n\n";
foreach $line (@bloatedFiles)
{
print FILE "$line";
}
chomp($endTime=`date`);
print FILE "\n\n";
print FILE "Time invoked : $dateStamp\n";
print FILE "Time completed : $endTime";
close(FILE);
open(REPORT,$reportFile);
@report = <REPORT>;
chomp(@report);
close(REPORT);
%mail=( To => $recipient,
From => $sender,
Subject => "BloatDetector Results for ".$dateStamp." from ".$s
+ender,
Message => join("\n",@report));
print "BloatDetector: Sending report..\n";
sendmail(%mail); # a-la-peanut-butter-sandwiches!
print "BloatDetector: Exiting..\n";
unlink $reportFile;