If you know the characters you want, just eliminate everything else.
#!perl
use strict;
use warnings;
use HTML::TableExtract;
use LWP::UserAgent ();
my $url = 'http://www.nasdaq.com/extended-trading/premarket-mostactive
+.aspx';
my $headers = ['Symbol', 'Last Sale*', 'Change Net / %', 'Share Volume
+'];
my $ua = LWP::UserAgent->new;
$ua->timeout(10);
$ua->env_proxy;
my $response = $ua->get($url);
if ( !$response->is_success) {
die $response->status_line;
}
my $htm = $response->decoded_content;
# table4
my $table_extract = HTML::TableExtract->new(
count => 4, headers => $headers);
my $tbl = $table_extract->parse($htm);
my $data = cleanup($tbl);
report('Advances',$data);
# table5
$table_extract = HTML::TableExtract->new(
count => 5, headers => $headers);
$tbl = $table_extract->parse($htm);
$data = cleanup($tbl);
report('Decliners',$data);
sub cleanup {
my $table = shift;
my @data = ();
for my $row ($table->rows) {
my @clean = map{ s/[^A-Z0-9%,+-\.]/ /g; # allowable
s/^ +| +$//g; # trim spaces
$_ } @$row;
push @data,\@clean;
}
return \@data;
}
sub report {
my ($title,$data) = @_;
print "$title\n";
for (@$data){
my ($stock,$openpr,$tmp,$vol) = @$_;
my ($change,$pct) = split / +/,$tmp;
my $closepr = $openpr - $change;
print join "\t",($stock,'$'.$closepr,'$'.$openpr,$pct,$vol);
print "\n";
}
}
poj