comment on

=head1 NAME
 
ParseCSV 

=head1 SYNOPSIS

 # This sample script will transpose the first and last column
 # of one CSV file and write it to another

 use Spreadsheet::ParseCSV;
 use Spreadsheet::WriteCSV;
 my $parser = ParseCSV->new("infile.csv");    # initialize the parser
 my $writer = WriteCSV->new("outfile.csv");    # initialize the writer

 while (my $row = $parser->parse_row()) {        # read the next row
   my $firstcol = $row->[0];
   $row->[0] = $row->[$#{$row}];            # swap first and last
   $row->[$#{$row}] = $firstcol;
   $writer->write_row($row);                # write the next row
 }
 $parser->close();                    # close the parser
 $writer->close();                    # close the writer

=head1 DESCRIPTION

 Object-oriented CSV (comma-separated) file parser and writer.

=head2 Definition of a CSV file

 1) Data columns are separated by commas.
 2) Rows are separated by line breaks (see rule 5 also).
 3) Data columns that contain commas are wrapped in double quotes.
 4) Data columns that contain double quotes are wrapped by double quot
+es
 and the quotes within the data are repeated (ie, "The ""Big"" Bomb" =
+= The "Big" Bomb).
 5) Data columns that contain line breaks are wrapped in double quotes
+.

=head2 ParseCSV METHOD SUMMARY

=over

=item new()

Object constructor that takes an argument hash. The hash must have one
+ key - "file" -
that specifies the input file path. Returns a reference to a new parse
+r object on success
or returns 0 if the file is unavailable or unreadable.

=item parse_row()

Method to read the next row of data from the file and returns an array
+ reference with the columns
of data. Returns 0 if there is no next row.

=item close()

Closes the input file.

=back

=head1 AUTHOR

Joshua Gitlin joshua.gitlin@gmail.com

=cut

# Revision history:
# Date        Author        Remarks
# 21-Sep-2004    Joshua Gitlin    Initial version

package ParseCSV;

use vars qw($VERSION);
$VERSION = "1.00";

# public constructor
# Takes and argument hash and expects one key "file"
# which the CSV file to parse. Will open the file for
# reading and return a boolean indicating whether or
# not initializion succeeded.
sub new {
    my ($class, $filename) = @_;
    if (-e $filename && !-r $filename) {
        return 0;
    }
    open CSV, "$filename" || return 0;
    my $csv = \*CSV;
    bless {
        _csv => $csv,
    }, $class;
}

# public method
# Gets the next row of the CSV file and returns the column
# values as an array reference.
sub parse_row {
    my $self = shift;
    my $csv = $self->{_csv};

        my @cols = ();
        my $col = "";

        # flags
        my $in_quoted_string = 0;
        my $eol = 0;

        while (1==1) {
                my $data = "";
        # read next line, return null if nothing is there
                $data = <$csv>;
                return unless ($data);
        $data =~ s/\r\n$/\n/g; # dos2unix
                chomp $data;
        # this will account for empty line breaks in one column
        if ($in_quoted_string && $data eq '') {
            $col .= "\n";
            next;
        }
        # Reverse the string so we can chop it one char at a time
                $data = reverse $data;
        return \@cols if (!$in_quoted_string && shortcut(\$data, \@col
+s));
                while (length($data) > 0) {
            my $next_char = chop $data;
                        if ($next_char eq '"' && !$in_quoted_string) {
                # beginning of a quoted cell
                                $in_quoted_string = 1;
                        } elsif ($next_char eq '"' && $in_quoted_strin
+g) {
                # encountered another double-quote, read next
                # char to determine what it means
                                my $next_next = chop $data;
                                if ($next_next && $next_next eq '"') {
                    # back-to-back quotes means the char is a quote
                                        $col .= '"';
                                } elsif (!$next_next || $next_next eq 
+',') {
                    # If we see a comma or EOL, the cell is closed
                                        $eol = 1 if (!$next_next);
                                        $in_quoted_string = 0;
                                        push @cols, $col;
                    return \@cols if (shortcut(\$data, \@cols));
                                        $col = "";
                                } else {
                    # This shouldn't occur in a well-formed CSV
                                        print "quote string error [$ne
+xt_next]\n";
                    return;
                                }
                        } elsif ($next_char eq ',' && !$in_quoted_stri
+ng) {
                # end of a non-quoted cell
                                push @cols, $col;
                return \@cols if (shortcut(\$data, \@cols));
                                $col = "";
                        } else {
                # just another character
                                $col .= $next_char;
                        }
                }
                if (!$in_quoted_string) {
            # end of line, end or row
                        if (!$eol) {
                                push @cols, $col;
                        } else {
                                $eol = 0;
                        }
                        return \@cols;
                } else {
            # This means we have reached the end of line,
            # but are still waiting for a close quote. Must
            # continue to the next line.
                        $col .= "\n";
                }
        }
}

# public method
# Optional call to close the CSV file. Returns a boolean
# indicating success or failure of close.
sub close {
    my $self = shift;
    my $csv = $self->{_csv};
    return close $csv;
}

# private method
# Subroutine that speeds up parsing if
# no more special columns (see rules 3-5) exist.
sub shortcut {
    # if there are no more quoted strings, we can
    # short circuit the parsing and just split() the rest
    my $dataref = shift;
    my $colsref = shift;
    if ($$dataref !~ /\"/g) {
        push @{$colsref}, split(",",reverse($$dataref),-1);
        return 1;
    }
    return 0;
}

1;
[download]

In reply to ParseCSV by spacepony

Are you posting in the right place? Check out Where do I post X? to know for sure.
Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
<code> <a> <b> <big> <blockquote> <br /> <dd> <dl> <dt> <em> <font> <h1> <h2> <h3> <h4> <h5> <h6> <hr /> <i> <li> <nbsp> <ol> <p> <small> <strike> <strong> <sub> <sup> <table> <td> <th> <tr> <tt> <u> <ul>
Snippets of code should be wrapped in <code> tags not <pre> tags. In fact, <pre> tags should generally be avoided. If they must be used, extreme care should be taken to ensure that their contents do not have long lines (<70 chars), in order to prevent horizontal scrolling (and possible janitor intervention).
Want more info? How to link or How to display code and escape characters are good places to start.


Syntactic Confectionery Delight
	PerlMonks