If you don't want to use the Google API, but parse the web page results, here's my code:
use strict;
use IO::Socket::INET;
my $limit = 5; #max number of output
&google_search(@ARGV);
sub google_search {
my $keyword = shift;
if (!$keyword) { die("no keywords\n"); }
my $socket = IO::Socket::INET->new(
Proto => "tcp",
PeerAddr => "www.google.com",
PeerPort => 80,
Timeout => 3
);
if (!$socket) { die("error connecting to the server\n"); }
$socket->autoflush(1);
my $query = $keyword; $query =~ tr/ /+/;
my $desc = "";
my $link = "";
my $junk = "";
my $idx = 0;
my $nodoc = 0;
print $socket "GET /search?hl=en&ie=ISO-8859-1&q=$query HTTP/1.1\r\n
+";
print $socket "Host: www.google.com\r\n";
print $socket "User-Agent: Mozilla/5.0\r\n";
print $socket "Accept: image/gif, image/x-xbitmap, image/jpeg, image
+/pjpeg, */*\r\n";
print $socket "Accept-Language: en-us,en;q=0.5\r\n";
print $socket "Connection: Keep-Alive\r\n";
print $socket "\r\n";
while (my $buffer = <$socket>) {
$buffer =~ s/\s+$//; $buffer =~ s/^\s+//; $buffer =~ tr/ //s;
$buffer =~ s/<b>//g; $buffer =~ s/<\/b>//g;
if (!$idx && ($buffer =~ /^<br><br>Your search - $keyword - did no
+t match any documents./)) {
print STDOUT "no doc found, sorry\n";
$nodoc = 1;
last;
} else {
if (!$desc) { ($junk, $desc) = $buffer =~ /(<\/blockquote>|<div>
+|<\/a><\/font> )<p class=g><a href=\S+>(.*?)<\/a>(<br>)?<font size=-1
+>([^<]| \- \[ | \.\.\.|<i>|<span class=f>)/; $desc =~ s/&/&/g; $d
+esc =~ s/"/"/g; }
if (!$link) { ($junk, $link) = $buffer =~ /(<\/blockquote>|<div>
+|<\/a><\/font> )<p class=g><a href=(\S+)>(.*?)<\/a>(<br>)?<font size=
+-1>([^<]| \- \[ | \.\.\.|<i>|<span class=f>)/; }
if ($desc && $link) {
if (++$idx > $limit) { last; }
print STDOUT "$idx) $desc\n";
print STDOUT " $link\n";
$desc = "";
$link = "";
}
}
}
close($socket);
if (!$idx && !$nodoc) { print STDOUT "no doc found, sorry\n"; }
}
1;
Depending on the results, Google's output page may change, so you have to mix some regex.
Run it using: perl file.pl you keywords here
Hope this helps.
P.S.: Google will change it's web design soon so this code may not work with the new one, I've to try.
-
Are you posting in the right place? Check out Where do I post X? to know for sure.
-
Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
<code> <a> <b> <big>
<blockquote> <br /> <dd>
<dl> <dt> <em> <font>
<h1> <h2> <h3> <h4>
<h5> <h6> <hr /> <i>
<li> <nbsp> <ol> <p>
<small> <strike> <strong>
<sub> <sup> <table>
<td> <th> <tr> <tt>
<u> <ul>
-
Snippets of code should be wrapped in
<code> tags not
<pre> tags. In fact, <pre>
tags should generally be avoided. If they must
be used, extreme care should be
taken to ensure that their contents do not
have long lines (<70 chars), in order to prevent
horizontal scrolling (and possible janitor
intervention).
-
Want more info? How to link
or How to display code and escape characters
are good places to start.