#!/opt/perl5/bin/perl -w
##
## Split a file into 'num' equal-size chunks, or into chunks with
## at most 'num' lines each.
##
## Usage:
## chunkify <-n num | -l num> [-v] [-o <output name>]
## [-f <footer>] [-h <header>] <input file>
##
#
use strict;
use Getopt::Std;
my ($verbose, $outfront, $outback, $froot, $chunks, $lines);
my ($infile, $headfile, @headlines, $footfile, @footlines);
my ($rem,@line,$fname,$x,$y,$opts);
my (%Options);
##
## Right now, assume the output file is based on the input file.
## This can be overridden by the -f option.
##
$infile = $ARGV[$#ARGV];
$froot = $infile;
$opts = getopts ('vo:h:f:n:l:',\%Options);
$lines=0;
$chunks=0;
if ($opts) {
if ($Options{v}) {
$verbose=$Options{v};
}
if ($Options{o}) {
$froot = $Options{o};
}
if ($Options{l} && $Options{n}) {
print "\nERROR: Cannot use -l and -n simultaneously.\n";
exit(1);
}
if (! $Options{l} && ! $Options{n}) {
print "\nERROR: Either -n or -l must be specified.\n";
exit (1);
}
if ($Options{n}) {
$chunks=$Options{n};
}
if ($Options{l}) {
$lines=$Options{l};
}
if ($Options{f}) {
$footfile=$Options{f};
}
if ($Options{h}) {
$headfile=$Options{h};
}
} else {
if (exists $Options{n} && !defined $Options{n}) {
print "\nERROR: -n requires a numeric argument if used.\n";
}
if (exists $Options{l} && !defined $Options{l}) {
print "\nERROR: -l requires a numeric argument if used.\n";
}
if (exists $Options{o} && !defined $Options{o}) {
print "\nERROR: -o requires a filename if used.\n";
}
if (exists $Options{h} && !defined $Options{h}) {
print "\nERROR: -h requires a filename if used.\n";
}
if (exists $Options{f} && !defined $Options{f}) {
print "\nERROR: -f requires a filename if used.\n";
}
ShowUsage();
}
$x=0;
$y=1;
($outfront,$outback) = split ("%",$froot);
##
## If header and/or footer files were specified, get their contents
##
if (defined $headfile) {
open (INFILE, $headfile) || die "Could not open header file $headf
+ile: $!";
@headlines = <INFILE>;
close (INFILE)
}
if (defined $footfile) {
open (INFILE, $footfile) || die "Could not open footer file $footf
+ile: $!";
@footlines = <INFILE>;
close (INFILE)
}
open (INFILE, $infile) || die "Could not open file $infile: $!";
##
## Either we were given the "lines" option, or we were given the "chu
+nks" option.
## If we got chunks, we need to figoure out how many lines that's goi
+ng to be, and
## the only way is to read through the file, counting lines.
##
if ($chunks != 0) {
while (<INFILE>) {
$x++;
}
$lines = sprintf "%d", ($x / $chunks);
## Did it divide out evenly? (eg: 141 lines into 13 "equal" files ain
+'t evenly)
## If not, some files will need an extra line.
foreach $y (1..$chunks) {
$line[$y]=$lines
}
$rem = $x - ($chunks * $lines);
if ($rem != 0) {
foreach $y (1..$rem) {
$line[$y]++;
}
}
print "Input file has $x lines; will put at least $lines lines in
+each output file.\n" if $verbose;
close INFILE || die "ERROR: Could not close $infile: $!";
open (INFILE, $infile) || die "Could not open file $infile: $!
+";;
}
$y=0;
until (eof INFILE) {
$y++;
if (defined $outback) {
$fname = $outfront . (sprintf "%0.3d",$y) . $outback;
} else {
$fname = $outfront . (sprintf "%0.3d",$y);
}
$lines= $line[$y] if (defined $line[$y]);
print "Creating $fname with $lines lines\n" if $verbose;
write_chunk();
}
close (INFILE);
sub write_chunk {
open (OUTFILE,">$fname") || die "ERROR Could not open output file
+$fname: $!";
$x=0;
if (@headlines) {
print OUTFILE @headlines || die "ERROR writing header to $fnam
+e: $!";
}
while(<INFILE>) {
print OUTFILE $_ || die "ERROR writing to output file $fname:
+$!";
$x++;
last unless $x % $lines;
}
if (@footlines) {
print OUTFILE @footlines || die "ERROR writing footer to $fnam
+e: $!";
}
close OUTFILE || die "ERROR closing $fname: $!";
}
sub ShowUsage {
print <<EOD;
Usage: chunkify.pl <-n num | -l num> [-o <output filename>] [-v]
[-h <header text file>] [-f <footer text file>] <input file na
+me>
Required paremeters (specify only one):
-n : split input file name into 'num' (more-or-less) equal-size fil
+es.
-l : split input file into files with 'num' lines.
In most cases, not all output files will have the same number of li
+nes. If -l is used,
each file will have that many lines, except for the last file, wh
+ich may have fewer.
If -n is used, each file will have X or X+1 lines, where X is cal
+culated based on
number of output files required and size of the input file.
Optional parameters:
-v : verbose output
-o : output file name. Use % sign to indicate position of sequence
+number. If % not
specified, sequence number will be appended.
Examples:
file%.txt: will generate file001.txt, file002.txt, file003.txt
+...
file.txt: will generate file.txt001, file.txt002, file.txt003 .
+..
%file: will generate 001file, 002file, 003file ...
If -o is not specified, output filename will be built from input f
+ile name, with
sequence number appended.
-h : file containing text to be prepended to each created output fi
+le.
-f : file containing text to be appended to each created output fil
+e.
EOD
}
|