FWIW. Here is the code I mentioned earlier. It just completed a run looking for pairs of atoms that are within .01 units of each other.
The input was 1,000,000 atom coordinates randomly generated in file that looks like this
atom000001 : 0119.110 0443.939 0146.027
atom000002 : -217.194 -175.476 -200.134
atom000003 : 0202.789 -383.911 0183.319
atom000004 : -459.869 0354.187 -421.509
atom000005 : 0446.625 0097.504 0243.835
It found 308,822 pairs within the requisite distance of one another (from the 1,000,000,000,000 possibles) in just under 7 hours on 2Ghz machine.
Whether the technique is adaptable to your application I'm not sure.
#! perl -slw
use strict;
use Data::Dumper;
use Benchmark::Timer;
my $T = new Benchmark::Timer;
our $CUTOFF ||= 10;
our $SCALE ||= 500;
my %atoms;
sub fullCheck{
my( $x1, $y1, $z1 ) = shift() =~ m[:\s+(\S+)\s+(\S+)\s+(\S+)];
my( $x2, $y2, $z2 ) = shift() =~ m[:\s+(\S+)\s+(\S+)\s+(\S+)];
return 1
if ( $x1 - $x2 )**2 + ( $y1 - $y2 )**2 + ( $z1 - $z2 )**2 <= $
+CUTOFF**2;
return 0
}
$T->start( 'reading' );
warn 'Reading';
open IN, '<', $ARGV[ 0 ] or die "$ARGV[ 0 ] : $!";
while( <IN> ) {
chomp;
my( $x, $y, $z ) = m[:\s+(\S+)\s+(\S+)\s+(\S+)];
# Adjust the floating coordinates to integer values.
$x = int( $x + 500 );
$y = int( $y + 500 );
$z = int( $z + 500 );
$atoms{ sprintf '%04d%04d%04d', $x, $y, $z } = $_;
}
close IN;
$T->stop( 'reading' );
$T->start( 'sorting' );
warn 'Sorting';
my @posns = sort{ $a <=> $b } keys %atoms;
$T->stop( 'sorting' );
my $delta = sprintf '%03d%03d%03d', ( $CUTOFF ) x 3;
my %pairs;
my( $lower, $upper ) = ( 0 ) x 2;
$T->start( 'searching' );
warn 'Searching';
for( my $current = 0; $current < @posns; $current++ ) {
printf STDERR "\r$current %s", scalar localtime unless $current %
+ 1000;
my $lValue = $posns[ $current ] - $delta;
my $uValue = $posns[ $current ] + $delta;
$lower++ while $posns[ $lower ] < $lValue;
$upper++ while $upper < $#posns and $posns[ $upper ] < $uValue;
for my $p1 ( $lower .. $upper-1 ) {
next if $p1 == $current;
my $possible = join' / ', @atoms{ @posns[ $current, $p1 ] };
next if exists $pairs{ $possible };
if( fullCheck( @atoms{ @posns[ $current, $p1 ] } ) ) {
$pairs{ $possible } = undef;
}
}
}
$T->stop( 'searching' );
print for keys %pairs;
$T->report;
__END__
P:\test>352838-2 352838.1MB >352838.1MB.out
Reading at P:\test\352838-2.pl line 21.
Sorting at P:\test\352838-2.pl line 36.
Searching at P:\test\352838-2.pl line 45.
999000 Wed Jun 16 20:57:47 20041 trial of reading (14.678s total)
1 trial of sorting (18.649s total)
1 trial of searching (23,438s total)
Examine what is said, not who speaks.
"Efficiency is intelligent laziness." -David Dunham
"Think for yourself!" - Abigail
"Memory, processor, disk in that order on the hardware side. Algorithm, algoritm, algorithm on the code side." - tachyon