The following example gets information from three sources: Google, MSN and Yahoo!. You would need to create a custom parser for each engine. You may wish to look at HTML::Parser for this.
#! /usr/bin/perl -w
use strict;
use warnings;
use LWP;
use threads;
use Thread::Queue;
my $query ="perl";
my $dataQueue = Thread::Queue->new;
my $threadCount = 0;
while (<DATA>)
{
chomp; s/^\s+//; s/\s+$//;
my ($engine, $url) = split /\s+/;
next unless $url;
$url.=$query;
print "$url\n";
my $thr = threads->new(\&doSearch, $engine, $url);
$thr->detach;
$threadCount ++;
}
while ($threadCount)
{
my $engine = $dataQueue->dequeue;
my $content = $dataQueue->dequeue;
print "$engine returned: $content\n";
$threadCount --;
}
print "Parse and return remaining content\n";
sub doSearch
{
my $engine = shift;
my $url = shift;
my $ua = LWP::UserAgent->new;
$ua->agent('Mozilla/5.0');
$ua->timeout(10);
$ua->env_proxy;
my $response = $ua->get($url);
if ($response->is_success) {
$dataQueue->enqueue($engine, $response->content);
}
else {
$dataQueue->enqueue($engine, $response->message);
}
}
__DATA__
Google http://www.google.com/search?q=
Yahoo! http://search.yahoo.com/search?p=
MSN http://beta.search.msn.co.uk/results.aspx?q=