=head1 NAME
ParseCSV
=head1 SYNOPSIS
# This sample script will transpose the first and last column
# of one CSV file and write it to another
use Spreadsheet::ParseCSV;
use Spreadsheet::WriteCSV;
my $parser = ParseCSV->new("infile.csv"); # initialize the parser
my $writer = WriteCSV->new("outfile.csv"); # initialize the writer
while (my $row = $parser->parse_row()) { # read the next row
my $firstcol = $row->[0];
$row->[0] = $row->[$#{$row}]; # swap first and last
$row->[$#{$row}] = $firstcol;
$writer->write_row($row); # write the next row
}
$parser->close(); # close the parser
$writer->close(); # close the writer
=head1 DESCRIPTION
Object-oriented CSV (comma-separated) file parser and writer.
=head2 Definition of a CSV file
1) Data columns are separated by commas.
2) Rows are separated by line breaks (see rule 5 also).
3) Data columns that contain commas are wrapped in double quotes.
4) Data columns that contain double quotes are wrapped by double quot
+es
and the quotes within the data are repeated (ie, "The ""Big"" Bomb" =
+= The "Big" Bomb).
5) Data columns that contain line breaks are wrapped in double quotes
+.
=head2 ParseCSV METHOD SUMMARY
=over
=item new()
Object constructor that takes an argument hash. The hash must have one
+ key - "file" -
that specifies the input file path. Returns a reference to a new parse
+r object on success
or returns 0 if the file is unavailable or unreadable.
=item parse_row()
Method to read the next row of data from the file and returns an array
+ reference with the columns
of data. Returns 0 if there is no next row.
=item close()
Closes the input file.
=back
=head1 AUTHOR
Joshua Gitlin joshua.gitlin@gmail.com
=cut
# Revision history:
# Date Author Remarks
# 21-Sep-2004 Joshua Gitlin Initial version
package ParseCSV;
use vars qw($VERSION);
$VERSION = "1.00";
# public constructor
# Takes and argument hash and expects one key "file"
# which the CSV file to parse. Will open the file for
# reading and return a boolean indicating whether or
# not initializion succeeded.
sub new {
my ($class, $filename) = @_;
if (-e $filename && !-r $filename) {
return 0;
}
open CSV, "$filename" || return 0;
my $csv = \*CSV;
bless {
_csv => $csv,
}, $class;
}
# public method
# Gets the next row of the CSV file and returns the column
# values as an array reference.
sub parse_row {
my $self = shift;
my $csv = $self->{_csv};
my @cols = ();
my $col = "";
# flags
my $in_quoted_string = 0;
my $eol = 0;
while (1==1) {
my $data = "";
# read next line, return null if nothing is there
$data = <$csv>;
return unless ($data);
$data =~ s/\r\n$/\n/g; # dos2unix
chomp $data;
# this will account for empty line breaks in one column
if ($in_quoted_string && $data eq '') {
$col .= "\n";
next;
}
# Reverse the string so we can chop it one char at a time
$data = reverse $data;
return \@cols if (!$in_quoted_string && shortcut(\$data, \@col
+s));
while (length($data) > 0) {
my $next_char = chop $data;
if ($next_char eq '"' && !$in_quoted_string) {
# beginning of a quoted cell
$in_quoted_string = 1;
} elsif ($next_char eq '"' && $in_quoted_strin
+g) {
# encountered another double-quote, read next
# char to determine what it means
my $next_next = chop $data;
if ($next_next && $next_next eq '"') {
# back-to-back quotes means the char is a quote
$col .= '"';
} elsif (!$next_next || $next_next eq
+',') {
# If we see a comma or EOL, the cell is closed
$eol = 1 if (!$next_next);
$in_quoted_string = 0;
push @cols, $col;
return \@cols if (shortcut(\$data, \@cols));
$col = "";
} else {
# This shouldn't occur in a well-formed CSV
print "quote string error [$ne
+xt_next]\n";
return;
}
} elsif ($next_char eq ',' && !$in_quoted_stri
+ng) {
# end of a non-quoted cell
push @cols, $col;
return \@cols if (shortcut(\$data, \@cols));
$col = "";
} else {
# just another character
$col .= $next_char;
}
}
if (!$in_quoted_string) {
# end of line, end or row
if (!$eol) {
push @cols, $col;
} else {
$eol = 0;
}
return \@cols;
} else {
# This means we have reached the end of line,
# but are still waiting for a close quote. Must
# continue to the next line.
$col .= "\n";
}
}
}
# public method
# Optional call to close the CSV file. Returns a boolean
# indicating success or failure of close.
sub close {
my $self = shift;
my $csv = $self->{_csv};
return close $csv;
}
# private method
# Subroutine that speeds up parsing if
# no more special columns (see rules 3-5) exist.
sub shortcut {
# if there are no more quoted strings, we can
# short circuit the parsing and just split() the rest
my $dataref = shift;
my $colsref = shift;
if ($$dataref !~ /\"/g) {
push @{$colsref}, split(",",reverse($$dataref),-1);
return 1;
}
return 0;
}
1;
-
Are you posting in the right place? Check out Where do I post X? to know for sure.
-
Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
<code> <a> <b> <big>
<blockquote> <br /> <dd>
<dl> <dt> <em> <font>
<h1> <h2> <h3> <h4>
<h5> <h6> <hr /> <i>
<li> <nbsp> <ol> <p>
<small> <strike> <strong>
<sub> <sup> <table>
<td> <th> <tr> <tt>
<u> <ul>
-
Snippets of code should be wrapped in
<code> tags not
<pre> tags. In fact, <pre>
tags should generally be avoided. If they must
be used, extreme care should be
taken to ensure that their contents do not
have long lines (<70 chars), in order to prevent
horizontal scrolling (and possible janitor
intervention).
-
Want more info? How to link
or How to display code and escape characters
are good places to start.