I prepared a test script which works for me, and transfers only as much of the image as necessary to extract the metadata:
#! /usr/bin/perl -w
#
# File: url_info
#
# Author: Phil Harvey
#
# Syntax: url_info URL
#
# Description: test to get image info from image on the net
#
# Example: url_info http://owl.phy.queensu.ca/~phil/big.jpg
#
# References: Based on web crawler script:
# http://www.linuxjournal.com/files/linuxjournal.com/lin
+uxjournal/articles/022/2200/2200l1.html
#
use strict;
use Image::ExifTool;
sub url_info($);
my $DEBUG = 0; # set to 1 for debugging network stuff
my $url = shift or die "Syntax: url_info URL\n";
my $exifTool = new Image::ExifTool;
$exifTool->Options(FastScan => 1);
my $info = url_info($url);
die "No image info for $url\n" unless $info;
foreach (sort keys %$info) {
print "$_ => $$info{$_}\n";
}
exit 1;
#---------------------------------------------------------------------
# Get the page at specified URL
# Inputs: 0) URL
# Returns: 0)
#---------------------------------------------------------------------
sub url_info($)
{
my $url = shift;
my ($protocol, $host, $port, $document) =
$url =~ m|^([^:/]*)://([^:/]*):*([0-9]*)/*([^:]*)$|;
# Some constants used to access the TCP network.
my $AF_INET=2;
my $SOCK_STREAM=1;
# Use default http port if none specified.
$port = 80 unless $port;
# Get the protocol number for TCP.
my ($name,$aliases,$proto) = getprotobyname("tcp");
# Get the IP addresses for the two hosts.
my ($type,$len,$thataddr);
($name,$aliases,$type,$len,$thataddr) = gethostbyname($host);
# Check we could resolve the server host name.
return undef unless defined $thataddr;
my ($a,$b,$c,$d) = unpack('C4', $thataddr);
if (not defined $d or
($a eq '' && $b eq '' && $c eq '' && $d eq ''))
{
warn "Unknown host $host.\n";
return undef;
}
print "Server: $host ($a.$b.$c.$d)\n" if $DEBUG;
# Pack the AF_INET magic number, the port, and the (already
# packed) IP addresses into the same format as the C structure
# would use. Note this is architecture dependent: this pack format
# works for 32 bit architectures.
my $that = pack("S n a4 x8", $AF_INET, $port, $thataddr);
# Create the socket and connect.
unless (socket(S, $AF_INET, $SOCK_STREAM, $proto)) {
warn "Cannot create socket.\n";
alarm 0;
return undef;
}
print "Socket OK\n" if $DEBUG;
local $SIG{ALRM} = sub { die "ALARM\n" };
alarm 3600; # set timeout of 1 hour
my $result = eval 'connect(S, $that)';
if ($@ or not $result) {
warn "Cannot connect to server $host, port $port.\n";
alarm 0;
return undef;
}
print "Connect OK\n" if $DEBUG;
# Turn buffering in the socket off, and send request to the server
select(S); $| = 1; select(STDOUT);
$result=eval 'print S "GET /$document HTTP/1.1\nHost: $host\n\n"';
if ($@ or not $result) {
warn "Timeout when sending to $host, port $port.\n";
alarm 0;
return undef;
}
# Receive the response. Check to ensure the response is of MIME
# type text/html or text/plain.
my $header = 1;
my $header_text = "";
for (;;) {
$_ = eval '<S>';
if ($@) {
warn "Timeout when reading from $host, port $port\n";
last;
}
last unless defined $_;
# Check if we've hit the end of the HTTP header (empty line)
# If we have, check for a content-type header line, and ensure
# it is valid.
if( m|^[\n\r]*$| ){
$header = 0;
my ($content) = $header_text=~/Content-type: ([^\s;]+)/i;
if ($content and $content =~ m{^image/(.*)}) {
# extract image metadata
my $raf = new File::RandomAccess(\*S);
$info = $exifTool->ImageInfo($raf);
} else {
warn "Not an image\n";
}
last; # all done
} elsif($header == 1){
# Save to a header string if we're still working on the
# HTTP header.
$header_text .= " " . $_;
}
}
eval 'close S';
alarm 0;
print "HTTP header: \n $header_text" if $DEBUG;
return $info;
}
- Phil |