Rolling your own can be educational. I did it because I thought that it would be trivial to do. By the time I finished..well I was educated. ;-)
Note that I used the quicksort rather than heapsort because I assumed that the issue is memory usage, and quicksort is easier to do.
#! /usr/bin/perl
use strict;
# Demo usage here
my $sorter = Sort::Limited->new(
comp => sub { $_[0] cmp $_[1] },
limit => 20,
);
for (1..100) {
$sorter->add($_);
}
print join "|", $sorter->extract();
# The silly module - not well tested, etc.
package Sort::Limited;
use Data::Dumper;
use Carp;
sub add {
my $self = shift;
ELEM: foreach my $elem (@_) {
my $node = $self->{data};
my $pos = 0;
while (exists $node->{elem}) {
# Traverse one level more
if ($self->{comp}->($elem, $node->{elem}) < 0) {
$node->{cnt}++;
$node = $node->{left};
}
else {
$pos += 1 + $node->{cnt};
if ($self->{limit} < $pos) {
# Filter data - we are past our limit
$node->{right} = { cnt=>0 };
next ELEM;
}
$node = $node->{right};
}
}
# Fill in this node
$node->{elem} = $elem;
$node->{left} = { cnt=>0 };
$node->{right} = { cnt=>0 };
# And check whether to replace the root node...
if ($self->{limit} < $self->{data}{cnt}) {
$self->{data} = $self->{data}{left};
}
}
}
sub extract {
my $self = shift;
my @results = _extract($self->{data});
my $last = @results < $self->{limit} ? @results : $self->{limit} - 1
+;
return @results[0..$last];
}
sub _extract {
my $node = shift;
if (exists $node->{elem}) {
return (
_extract($node->{left}),
$node->{elem},
_extract($node->{right}),
);
}
else {
return ();
}
}
sub new {
my ($class, %args) = @_;
my $self = {};
$self->{comp} = delete $args{comp} || sub {$_[0] cmp $_[1]};
$self->{limit} = delete $args{limit} || 10;
if (%args) {
$Data::Dumper::Indent = 1;
confess ("Unprocessed arguments: ", Dumper(\%args));
}
$self->{data} = {
cnt => 0,
};
return bless $self, $class;
}