Perhaps you can adapt this
#!perl
use strict;
use Archive::Zip::MemberRead;
use XML::Twig;
use Text::CSV;
Archive::Zip::MemberRead->setLineEnd();
my $csv = Text::CSV->new ( {binary=>1, eol=>"\012"} )
or die "Cannot use CSV: ".Text::CSV->error_diag();
my $infile = 'header1.xlsx';
my $outfile = 'results.csv';
open my $fh_out,'>',$outfile
or die "Could not open $outfile: $!";
my @sheet;
my $zip = Archive::Zip->new($infile);
for my $name ($zip->memberNames()){
if ($name =~ /sheet(\d+).xml$/){
$sheet[$1] = $name;
}
}
my @fields = ('oddHeader','evenHeader','oddFooter','evenFooter');
$csv->print($fh_out, [$infile,scalar localtime()]);
$csv->print($fh_out, ['Sheet',@fields]);
my %text=();
for my $no (1..$#sheet) {
%text=();
process_sheet( $sheet[$no] );
my @row = map{ $text{$_} } @fields;
$csv->print($fh_out, [$no,@row]);
}
$fh_out->close or die "$!";
print "Results written to $outfile\n";
sub process_sheet {
my $filename = shift;
print "Extracting text from : $filename\n";
my $fh_in = Archive::Zip::MemberRead->new($zip, $filename);
my $xml = $fh_in->getline();
$fh_in->close();
my $twig = XML::Twig->new( keep_spaces=>1,
twig_roots => { 'headerFooter' => \&get_text },
);
$twig->parse( $xml );
}
sub get_text {
my ($t,$elt) = @_;
for ($elt->children){
my $text = $_->text;
# remove left center right format codes
$text =~ s/&[LCR]/ /g;
$text{$_->name} = $text ;
}
}
poj |