http://qs321.pair.com?node_id=953420

anshumangoyal has asked for the wisdom of the Perl Monks concerning the following question:

I have written a program to fork a process and then threads in that program. While this program is running on one machine it is giving Out of Memory on another machine and the other machine is much better when compare with the 1st machine. Here is my complete code:
#!/usr/bin/perl -w #-limit parameter is required when running a call without MVO. So make + sure MVO has been bypassed when this parameter is given. #Difference from LoadScript_05 -> New Calls are pumped only when Previ +uos calls are complte. This was not happening in 05. #Added support to calculate how many calls are rejected by CAC #Added Call Duration Parameter as manual input only as ffmpeg is not s +upported on some machines. #Added Support for capturing Netstat data in *netstat.log file. #Added Threads Support. Now for each call a process is not forked but +threads are created for each process. use strict; use Cwd; use Getopt::Long; use Parallel::ForkManager; use Time::Local; use Time::HiRes qw( usleep ); use File::Path; use File::stat; use Math::Round; use WWW::Curl::Easy; use threads; use threads::shared; my $timeOutAddition = 5; my $bandwidthCorrection = 3000; my $concurrentCalls; my $cps; my $total_calls; my $callsPerProcess; my $displayHelp; my $call = 1; my $Link; my $logName; my $limitBW; my $fileDur; my $proxy; my %CallInfoHash = (); my $fileWithDur; my %fileDurHash; my $totalFiles; GetOptions ( 'cat=i', \$concurrentCalls, 'cps=i', \$cps, 'tc=i', \$total_calls, 'cpp=i', \$callsPerProcess, 'log=s', \$logName, 'file=s', \$fileWithDur, 'proxy=s', \$proxy, 'limit=i', \$limitBW, 'help|?', \$displayHelp, ); sub Usage { print "Usage: perl RunLoad.pl [OPTIONS]\n"; print "Options:\n"; print " -cat Concurrent Calls not more than 1000 per load m +achine.\n"; print " -cps Calls per second not more than 5 Per Load Mach +ine.\n"; print " -tc Total calls.\n"; print " -cpp Calls Per Process. Should be multiple of CPS.\ +n"; print " -log Log Name for which Log file and directory will + be created.\n"; print " -file File Name having file name with exact File Dur +ation. When given -url and -dur flag will not be used.\n"; print " -proxy Proxy for Call - Optional Parameter.\n"; print " -limit Bandwidth Limit - Optional Parameter.\n"; exit -1; } if (defined $displayHelp) { Usage(); } if ( (!defined $concurrentCalls) || (!defined $cps) || (!defined $tota +l_calls) || (!defined $logName) || (!defined $callsPerProcess) ) { Usage(); } if (!defined $fileWithDur) { print "-file flag. Value not given\n"; Usage(); } if (! defined $limitBW) { $limitBW = 0; } if (-d $logName) { print "$logName exists, cleaning it's contents\n"; `rm -rf $logName`; mkdir ($logName); } else { mkdir ($logName); } my $LoadOperations = "/home/agoyal/Load_Scripts/LoadMachine.py"; my $resultsLog = "$logName/Result.log"; my $fileInfoCsv = "$logName/fileInfo.csv"; my $netstatLogs = "$logName/Netstat.csv"; my $logDir = $logName; my $RESULTS = OpenFileToWrite($resultsLog); my $NET_STATS = OpenFileToWrite($netstatLogs); my $count = 0; if (defined $fileWithDur) { my @allLines = ReadFile($fileWithDur); foreach (@allLines) { if ($_ =~ m/(\S+)\s+(\d+)\s+(\d+)/) { $fileDurHash{PadZeros($count)}{'file_name'} = $1; $fileDurHash{PadZeros($count)}{'file_dur'} = $2; $fileDurHash{PadZeros($count)}{'file_size'} = $3; $fileDurHash{PadZeros($count)}{'file_curl_limit'} = round( +$3/$2); $count++; } } } else { $fileDurHash{PadZeros($count)}{'file_name'} = $Link; $fileDurHash{PadZeros($count)}{'file_dur'} = $fileDur; } Print ("Results in : $resultsLog"); Print ("Files Info : $fileInfoCsv"); Print ("Netstat in : $netstatLogs"); Print ("+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+"); my $NetStatPID = fork(); if (! $NetStatPID) { RunNetStat($NET_STATS); exit 0; } if ( $limitBW == 1 ) { Print ("Bandwidth Limit Flag Set!"); } if (defined $proxy) { Print ("Proxy : $proxy"); } if (! defined $fileWithDur) { Print ("Concurrent Calls = $concurrentCalls, CPS = $cps, Total Cal +ls = $total_calls, Calls Per Process = $callsPerProcess, Link = $Link +"); } else { Print ("Concurrent Calls = $concurrentCalls, CPS = $cps, Total Cal +ls = $total_calls, Calls Per Process = $callsPerProcess"); } Print ("+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+"); #Forcing Print to flush when ever a print command is given #Needed specially forking the processes. $| = 1; my $htmlHeaderFile; my $limitRate; my $CurrentCall : shared = 1; my $waitCallNumber = 0; my $fractionSecondSleep = (1/$cps) * 1000000; my $Fork_ConcurrentCalls = $concurrentCalls/$callsPerProcess; my $Fork_TotalCalls = $total_calls/$concurrentCalls; my $Fork_Total = $Fork_ConcurrentCalls * $Fork_TotalCalls; my $sTime = `date +%s`; chomp($sTime); Print ("Start Time: $sTime"); #Creating Database File, to which every process is going to write it's + values. `rm -rf /tmp/output.out`; my $outFile = "/tmp/output.out"; my $outFilePtr; if (! open ($outFilePtr, '>>', $outFile)) { die "Cannot Open File $outFile. Reason $!\n"; } Print ("Total Processes Forked for Concurrent Calls = $Fork_Concurrent +Calls"); Print ("Total Processes Forked for Complete Load = $Fork_Total"); my $pm = Parallel::ForkManager->new($Fork_ConcurrentCalls); $pm->set_max_procs($Fork_ConcurrentCalls); my @threadsID; $pm->run_on_start ( sub { my ($pid, $ident) = @_; @threadsID = (); my $threadNum = 1; for (1..$callsPerProcess) { my $uniqueNum = PadZeros($CurrentCall); my @array; push (@array, $threadNum); push (@array, $pid); push (@array, $uniqueNum); $threadNum++; #Thread is Created Here and it calls threadSub Routine my $thread = threads->create(\&threadSub, @array); lock($CurrentCall); push(@threadsID, $thread); $CurrentCall++; $waitCallNumber++; #Sleeping in between a thread for some fraction #so that calls are not sent to MVO in a shot. usleep($fractionSecondSleep); } if ($waitCallNumber >= $concurrentCalls) { sleep(1); if ($CurrentCall <= $total_calls) { print "Waiting for $concurrentCalls calls to finish\n" +; } else { print "Waiting for all calls to finish\n"; } my @threadsRunning = threads->list(); foreach (@threadsRunning) { $_->join(); } $waitCallNumber = 0; } else { foreach (@threadsID) { $_->detach(); } } } ); #Total Set to Fork = 2 (500 * 2) for (1..$Fork_TotalCalls) { for (1..$Fork_ConcurrentCalls) { my $pid = $pm->start() and next; $pm->finish($?>>8); } } print "Waiting for All Children\n"; $pm->wait_all_children; print "All Finished\n"; sub threadSub { my ($threadNum, $processNum, $uniqueNum) = @_; my $random = int(rand(keys %fileDurHash)); $random = PadZeros($random); $Link = $fileDurHash{$random}{'file_name'}; $fileDur = $fileDurHash{$random}{'file_dur'} + $timeOutAddition; $limitRate = $fileDurHash{$random}{'file_curl_limit'} + $bandwidth +Correction; my $timeStamp = ` date | awk '{ print \$4 }' `; chomp($timeStamp); if ( $limitBW == 1 ) { printf "Call %-5s of %-5s at %-8s || File: %s || Timeout: %s | +| Thread: %-2s || Process: %-5s || LimitBW: %-6s bytes/s\n", $uniqueN +um, $total_calls, $timeStamp, $Link, $fileDur, $threadNum, $processNu +m, $limitRate; } else { printf "Call %-5s of %-5s at %-8s || File: %s || Timeout: %s | +| Thread: %-2s || Process: %-5s\n", $uniqueNum, $total_calls, $timeSt +amp, $Link, $fileDur, $threadNum, $processNum; } $htmlHeaderFile = $logName."/header_html_".$uniqueNum; my $FILE_PTR = OpenFileToWrite('/dev/null'); my $HTML_PTR = OpenFileToWrite($htmlHeaderFile); my $curl = WWW::Curl::Easy->new(); $curl->setopt(CURLOPT_TIMEOUT, $fileDur); $curl->setopt(CURLOPT_HEADER,1); $curl->setopt(CURLOPT_URL, $Link); $curl->setopt(CURLOPT_MAXREDIRS, 3); $curl->setopt(CURLOPT_FOLLOWLOCATION, 1); $curl->setopt(CURLOPT_WRITEHEADER, \$HTML_PTR); $curl->setopt(CURLOPT_FILE, \$FILE_PTR); if ( $limitBW == 1) { $curl->setopt(CURLOPT_MAX_RECV_SPEED_LARGE, $limitRate); } my $retcode = $curl->perform; my $redir_count = $curl->getinfo(CURLINFO_REDIRECT_COUNT); my $down_speed = $curl->getinfo(CURLINFO_SPEED_DOWNLOAD); my $time_taken = $curl->getinfo(CURLINFO_TOTAL_TIME); my $connection_time = $curl->getinfo(CURLINFO_CONNECT_TIME); my $start_time = $curl->getinfo(CURLINFO_STARTTRANSFER_TIME); my $down_size = $curl->getinfo(CURLINFO_SIZE_DOWNLOAD); $time_taken = sprintf("%.2f", $time_taken); $connection_time = sprintf("%.2f", $connection_time); $start_time = sprintf("%.2f", $start_time); close ($FILE_PTR); close ($HTML_PTR); print $outFilePtr "$uniqueNum, $Link, $htmlHeaderFile, $retcode, $ +redir_count, $down_speed, $time_taken, $connection_time, $start_time, + $down_size\n"; } close ($outFilePtr); my @outFile = ReadFile($outFile); foreach (@outFile) { my $currLine = $_; my @splitMe = split(/, /, $currLine); $CallInfoHash{$splitMe[0]}{'curl_file_processed'} = $splitMe[1]; $CallInfoHash{$splitMe[0]}{'curl_header_file'} = $splitMe[2]; $CallInfoHash{$splitMe[0]}{'curl_return_code'} = $splitMe[3]; $CallInfoHash{$splitMe[0]}{'curl_redirect_count'} = $splitMe[4]; $CallInfoHash{$splitMe[0]}{'curl_download_speed'} = $splitMe[5]; $CallInfoHash{$splitMe[0]}{'curl_time_taken'} = $splitMe[6]; $CallInfoHash{$splitMe[0]}{'curl_connection_time'} = $splitMe[7]; $CallInfoHash{$splitMe[0]}{'curl_start_time'} = $splitMe[8]; chomp($splitMe[9]); $CallInfoHash{$splitMe[0]}{'curl_download_size'} = $splitMe[9]; } #Deleting output.dat file which is database file written by each proce +ss. #`rm -rf /tmp/output.out`; my $eTime = `date +%s`; chomp($eTime); Print ("End Time: $eTime"); my $timeTaken = $eTime - $sTime; Print ("Time Taken: $timeTaken"); #Killing Netstat Process. `kill -9 $NetStatPID`; FindCacRejections(); ProcessReport(); PrintCallInfo(); close ($RESULTS); close ($NET_STATS); #*********************************** Load Calls Process Ends ********* +*********************# #Padding Zeros to make each call Unique sub PadZeros { my $num = $_[0]; my $paddedNum = 0; if (($num >= 0) && ($num <= 9)){ $paddedNum = "0000000$num"; } elsif (($num > 9) && ($num <= 99)){ $paddedNum = "000000$num"; } elsif (($num > 99) && ($num <= 999)){ $paddedNum = "00000$num"; } elsif (($num > 999) && ($num <= 9999)){ $paddedNum = "0000$num"; } elsif (($num > 9999) && ($num <= 99999)){ $paddedNum = "000$num"; } elsif (($num > 99999) && ($num <= 999999)){ $paddedNum = "00$num"; } elsif (($num > 999999) && ($num <= 9999999)){ $paddedNum = "0$num"; } else { $paddedNum = $num; } return $paddedNum; } #Running Netstats to find number of ESTABLISHED Connections at a time. sub RunNetStat { my $logsFile = $_[0]; my $interval = 5; print $logsFile "TimeStamp, Established, Cores\n"; while(1) { my $established = 0; my @netStatOutput = `netstat -nt 2>&1`; my $timeStamp = ` date | awk '{ print \$4 }' `; chomp($timeStamp); foreach (@netStatOutput) { if ($_ =~ m/(5050|5051)\s+established/gi) { $established ++; } } print $logsFile "$timeStamp, $established, 0\n"; sleep($interval); } } #Opens File to Write and returns File Pointer sub OpenFileToWrite { my $fileName = $_[0]; my $filePTR; if ( ! (open ($filePTR, '>', $fileName))) { die "Cannot Open $fileName. Reason $!\n"; } else { return $filePTR; } } #Reads file and retun array with the content sub ReadFile { my $fileName = $_[0]; my @returnArray = (); if (!open (FILE, '<', $fileName)) { die "Cannot open File $fileName, Reason: $!\n"; } @returnArray = <FILE>; close (FILE); return @returnArray; } #Prints on Screen as well as in file with New Line. sub Print { my $toPrint = $_[0]; chomp($toPrint); print "$toPrint\n"; print $RESULTS "$toPrint\n"; } #Prints Call info in CSV Format. sub PrintCallInfo { my $LOAD_INFO = OpenFileToWrite($fileInfoCsv); print $LOAD_INFO "Call-Num, File-Name, Curl-Time-To-Start, Curl-Ti +me-Taken, Curl-Return-Code, Cac-Triggered, HTML-Header, Content-Lengt +h, Cache-Served, Download-Rate, Curl-Connection-Time\n"; foreach my $keys (sort keys %CallInfoHash) { my $file_name = $CallInfoHash{$keys}{'curl_file_processed'}; my $start_time = $CallInfoHash{$keys}{'curl_start_time'}; my $time_taken = $CallInfoHash{$keys}{'curl_time_taken'}; my $exit_code = $CallInfoHash{$keys}{'curl_return_code'}; my $cac_triggered = $CallInfoHash{$keys}{'cac_triggered'}; my $html_header = $CallInfoHash{$keys}{'curl_header_file'}; my $content_length = $CallInfoHash{$keys}{'curl_download_size' +}; my $cache_served = $CallInfoHash{$keys}{'curl_redirect_count'} +; my $download_speed = $CallInfoHash{$keys}{'curl_download_speed +'}; my $curl_connection_time = $CallInfoHash{$keys}{'curl_connecti +on_time'}; print $LOAD_INFO "$keys, $file_name, $start_time, $time_taken, + $exit_code, $cac_triggered, $html_header, $content_length, $cache_se +rved, $download_speed, $curl_connection_time\n"; } close ($LOAD_INFO); } #Processes Report to find number of Rejections. sub ProcessReport { my $TotalProcessesForked = keys %CallInfoHash; my $Curl_ErrorCode_07 = 0; my $Curl_ErrorCode_18 = 0; my $Curl_ErrorCode_28 = 0; my $Curl_ErrorCode_52 = 0; my $ErrorCode_Cac = 0; my $failed_Calls = 0; my $passed_Calls = 0; my $Cache_Served = 0; foreach my $keys (sort keys %CallInfoHash) { if ($CallInfoHash{$keys}{'cac_triggered'} == 1) { $ErrorCode_Cac++; $failed_Calls++; } else { if ($CallInfoHash{$keys}{'curl_return_code'} =~m /0/i) { $passed_Calls++; } elsif ($CallInfoHash{$keys}{'curl_return_code'} =~ m/7/) + { $Curl_ErrorCode_07++; $failed_Calls++; } elsif ($CallInfoHash{$keys}{'curl_return_code'} =~ m/18/ +) { $Curl_ErrorCode_18++; $failed_Calls++; } elsif ($CallInfoHash{$keys}{'curl_return_code'} =~ m/28/ +) { $Curl_ErrorCode_28++; $failed_Calls++; } elsif ($CallInfoHash{$keys}{'curl_return_code'} =~ m/52/ +) { $Curl_ErrorCode_52++; $failed_Calls++; } } if ($CallInfoHash{$keys}{'curl_redirect_count'} > 0) { $Cache_Served++; } } Print ("-------- Load Results -----------"); Print ("Total Processes Forked = $TotalProcessesForked"); Print ("Total Processes Failed = $failed_Calls"); Print ("Total Processes Passed = $passed_Calls"); Print ("Total Processes Cac'ed = $ErrorCode_Cac"); Print ("Total Cache Served = $Cache_Served"); Print ("-------- Load Errors -----------"); Print ("Total Error Code 7 = $Curl_ErrorCode_07"); Print ("Total Error Code 18 = $Curl_ErrorCode_18"); Print ("Total Error Code 28 = $Curl_ErrorCode_28"); Print ("Total Error Code 52 = $Curl_ErrorCode_52"); } #Find from header file how many calls were rejected by CAC. sub FindCacRejections { foreach my $keys (sort keys %CallInfoHash) { my $currHeaderFile = $CallInfoHash{$keys}{'curl_header_file'}; $CallInfoHash{$keys}{'cac_triggered'} = 0; if ($currHeaderFile =~ m/header_html/i) { my @fileContent = ReadFile($currHeaderFile); foreach (@fileContent) { if ($_ =~ m/500\s+INKApi\s+Error/i) { $CallInfoHash{$keys}{'cac_triggered'} = 1; last; } } } else { Print "$keys has no header File\n"; } } }