#use Text::CSV::R qw(:all); use Text::CSV::R qw(read_table colnames rownames); my $M = read_table($filename, \%options); print join(q{,}, colnames($M)); print join(q{,}, rownames($M)); print $M->[0][0]; for my $row (@{$M}) { for my $col (@{$row}) { # do someting with $col } } #### package Text::CSV::R; require 5.005; use strict; use warnings; require Exporter; use Text::CSV; use Text::CSV::R::Matrix; use Carp; use Scalar::Util qw(reftype); our @ISA = qw(Exporter); our %EXPORT_TAGS = ( 'all' => [ qw( read_csv read_csv2 read_table read_delim rownames colnames ) ] ); our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); our $VERSION = '0.01'; our $DEFAULT_OPTS = { header => undef, skip => 0, nrow => -1, sep_char => q{,}, quote_char => q{"}, allow_whitespace => 0, binary => 1, blank_lines_skip => 1, }; # A mapping of the R options to the Text:CSV options. If there is no # Text::CSV equivalent, the same option name is used (R options are # not passed to Text::CSV). our $R_OPT_MAP = { sep => 'sep_char', quote => 'quote_char', skip => 'skip', nrow => 'nrow', header => 'header', encoding => 'encoding', row_names => 'row_names', strip_white => 'allow_whitespace', blank_lines_skip => 'blank_lines_skip', }; sub colnames { my ( $tied_ref, $values ) = @_; my $tied_obj = tied @{$tied_ref}; if ( defined $values ) { $tied_obj->{colnames} = $values; } return $tied_obj->{colnames}; } sub rownames { my ( $tied_ref, $values ) = @_; my $tied_obj = tied @{$tied_ref}; if ( defined $values && reftype $values eq 'ARRAY' ) { if (scalar @{$values} != scalar @{$tied_obj->{ARRAY}}) { croak 'Invalid rownames length'; } $tied_obj->{rownames} = $values; } return $tied_obj->{rownames}; } # merge the global default options, function defaults and user options sub _merge_options { my ( $t_opt, $u_opt ) = @_; my %ret = %{$DEFAULT_OPTS}; @ret{ keys %{$t_opt} } = values %{$t_opt}; @ret{ keys %{$u_opt} } = values %{$u_opt}; for my $k ( keys %{$R_OPT_MAP} ) { if ( defined $ret{$k} ) { $ret{ $R_OPT_MAP->{$k} } = $ret{$k}; } } return \%ret; } sub read_table { my ( $file, %u_opt ) = @_; return _read( $file, _merge_options( {}, \%u_opt ) ); } sub read_csv { my ( $file, %u_opt ) = @_; my $t_opt = { sep_char => q{,}, header => 1, }; return _read( $file, _merge_options( $t_opt, \%u_opt ) ); } sub read_csv2 { my ( $file, %u_opt ) = @_; my $t_opt = { sep_char => q{;}, header => 1, }; return _read( $file, _merge_options( $t_opt, \%u_opt ) ); } sub read_delim { my ( $file, %u_opt ) = @_; my $t_opt = { sep_char => "\t", header => 1, }; return _read( $file, _merge_options( $t_opt, %u_opt ) ); } # check if $file is a filehandle, if not open file with correct encoding. # Then let _parse_fh do the work. sub _read { my ( $file, $opts ) = @_; my $data_ref; if (reftype \$file eq 'SCALAR') { my $encoding = q{}; if (defined $opts->{encoding} && length $opts->{encoding} > 0) { $encoding = ':encoding(' . $opts->{encoding} . ')'; } open my $IN, '<' . $encoding, $file or croak "Cannot open $file for reading: $!"; $data_ref = _parse_fh( $IN, $opts ); close $IN or croak "Cannot close $file: $!"; } else { $data_ref = _parse_fh( $file, $opts ); } return $data_ref; } # parsing of the file in a 2d array, store column and row names. sub _parse_fh { my ( $IN, $opts ) = @_; my @data; my $obj = tie @data, 'Text::CSV::R::Matrix'; my %text_csv_opts = %{$opts}; delete @text_csv_opts{ keys %{$R_OPT_MAP} }; my $csv = Text::CSV->new( \%text_csv_opts ) or croak q{Cannot use CSV: } . Text::CSV->error_diag(); # skip lines my $line_number = 0; while ( $line_number < $opts->{skip} && <$IN> ) { $line_number++; } $line_number = 0; my $max_cols = 0; LINE: while ( my $line = <$IN> ) { chomp $line; # blank_lines_skip option if ( !length($line) && defined $opts->{'blank_lines_skip'} && $opts->{'blank_lines_skip'} ) { next LINE; } my $status = $csv->parse($line) or croak q{Cannot parse CSV: } . $csv->error_input(); push @data, [ $csv->fields() ]; if ( scalar( @{ $data[-1] } ) > $max_cols ) { $max_cols = scalar @{ $data[-1] }; } $line_number++; # nrow option. Store one more because file might contain header. last LINE if ( defined $opts->{nrow} && $opts->{nrow} >= 0 && $line_number > $opts->{nrow} ); } my $auto_col_row = scalar @{$data[0]} == $max_cols - 1 ? 1 : 0; # read column names if ( $auto_col_row || ( defined $opts->{header} && $opts->{header} ) ) { colnames( \@data, shift @data ); } else { colnames( \@data, [ map { 'V' . $_ } 1 .. $max_cols ] ); if ( defined $opts->{nrow} && scalar(@data) > $opts->{nrow} ) { pop @data; } } # read row names my @rownames; if ( $auto_col_row ) { for my $row (@data) { push @rownames, $row->[0]; shift @{$row}; } } elsif ( defined $opts->{row_names} && reftype \$opts->{row_names} eq 'SCALAR' ) { for my $row (@data) { push @rownames, $row->[$opts->{row_names}]; splice @{$row}, $opts->{row_names}, 1; } } else { @rownames = 1 .. scalar @data; } rownames(\@data, \@rownames); return \@data; } 1;