http://qs321.pair.com?node_id=446298
Category: Text Processing
Author/Contact Info Rudif Rudi Farkas
Description: hugepad is a viewer for huge text files, where huge is up to 500 MB. Advice and help from perlmonks BrowserUk and zentara is gladly acknowledged. See also recent threads Displaying/buffering huge text files and perl Tk::Scrolled : how do I hijack existing subwidget bindings?.

#!/usr/bin/perl -w
#
# hugepad is a viewer for huge (*) text files, by Rudi Farkas (C) 2005
#
# (*) tested up to 500 MB
#

use strict;

######################################################################
+############

package FileIndexer;

# opens a text file, indexes file lines and fetches them on demand

our $VERSION = 0.1;

# requires filename of a text file of a reasonable size
# opens and indexes the file
sub new {
    my ( $class, %args ) = @_;
    my $self = {
        file  => $args{file}  || '',
        debug => $args{debug} || 0,
    };
    bless $self, $class;
    open( my $in, $self->{file} ) or die "Couldn't read $self->{file}:
+ $!";
    $self->{filehandle} = $in;
    $self->_indexFileLines();
    return $self;
}

# requires 0-based indexes of first line and of last line to fetch fro
+m file
# returns the array of lines requested
sub getLines {
    my $self = shift;
    my $i    = shift || 0;
    my $k    = shift;
    $k = $i unless defined $k;
    my @lines;
    my $fh = $self->{filehandle};
    $i = 0 if $i < 0;
    my $numlines = $self->numLines();
    $k = $numlines - 1 if $k >= $numlines;

    for my $j ( $i .. $k ) {
        my $linestart = unpack( 'd', substr $self->{index}, $j * 8, 8 
+);
        last unless defined $linestart;
        my $ok = seek $fh, $linestart, 0;
        chomp( $_ = <$fh> );
        push @lines, $_;
    }
    printf STDERR "getLines $i..$k, got %d\n", @lines + 0 if $self->{d
+ebug};
    return @lines;
}

# returns the number of lines in the file
sub numLines {
    my $self = shift;
    return length( $self->{index} ) / 8;
}

# indexes the file lines
sub _indexFileLines {
    my $self = shift;
    my $fh   = $self->{filehandle};
    seek $fh, 0, 0;
    my @index = ( pack 'd', 0 );
    push @index, pack 'd', tell $fh while <$fh>;
    pop @index;
    $self->{index} = join '', @index;
}

1;

######################################################################
+############

package Inserter;

# inserts lines fetched from indexed text file into a text widget

our $VERSION = 0.1;

sub min { $_[0] < $_[1] ? $_[0] : $_[1]; }
sub max { $_[0] > $_[1] ? $_[0] : $_[1]; }

# clip a,b,c
# requires a <= c
# returns b clipped to limits a, c
sub clip { max( $_[0], min( $_[1], $_[2] ) ) }

# requires a Tk application that provides a text widget
# and suitable inserting operations (doc TBD)
sub new {
    my $class = shift;
    my %args  = @_;
    my $self  = {
        app            => $args{app},
        debug          => $args{debug} || 0,
        fileindexed    => undef,              # initially
        debug          => 0,
        text_maxlines  => 1001,               # max number of lines in
+ the text widget
        text_pagelines => 100,                # page size for medium s
+peed scrolling
        text_begin     => 0,                  # 0-based index into fil
+e, of the first file line currently in text widget
        text_end => 0,    # 0-based index into file, of the file line 
+after the last line currently in text widget

    };
    bless $self, $class;
}

# requires a filename
# opens the file, indexes it and inserts the first page into the app's
+ text widget
sub openFile {
    my $self = shift;
    my $file = shift;
    if ( -f $file ) {
        $self->{fileindexed} = undef;                               # 
+closes the open file if any
        $self->{fileindexed} = FileIndexer->new( file => $file );
        $self->{text_begin}  = 0;                                   # 
+0-based
        $self->{text_end}    = 0;                                   # 
+0-based, after the last line gotten
        $self->{app}{text}->delete( '1.0', 'end' );
        $self->appendLines( $self->{text_maxlines} );
    }
    else {

        # TODO report error
    }
}

# returns the number of lines currently in the text widget
sub textlines {
    my $self = shift;
    return $self->{text_end} - $self->{text_begin};
}

# returns the number of file lines
sub filelines {
    my $self = shift;
    if ( defined $self->{fileindexed} ) {
        return $self->{fileindexed}->numLines();
    }
    else {
        return 0;
    }
}

# requires number of lines to delete starting at the first line in the
+ text widget
# deletes specified lines from the text widget
sub deleteFirstLines {
    my $self       = shift;
    my $chunklines = shift || $self->{text_pagelines};
    my $n          = min( $chunklines, $self->textlines() );
    $self->{app}{text}->delete( '1.0', "1.0 + $n lines" );
    $self->{text_begin} += $n;
    print STDERR "deleteFirstLines $n, now: $self->{text_begin}.,$self
+->{text_end}\n" if $self->{debug};

    $self->updateMessageAndScrollbar2();
}

# requires number of lines to delete ending with the last line in the 
+text widget
# deletes specified lines from the text widget
sub deleteLastLines {
    my $self       = shift;
    my $chunklines = shift || $self->{text_pagelines};
    my $n          = min( $chunklines, $self->textlines() );
    my $from       = $self->textlines() - $n;
    $self->{app}{text}->delete( "$from.0", 'end' );
    $self->{text_end} -= $n;
    print STDERR "deleteLastLines $n, now: $self->{text_begin}.,$self-
+>{text_end}\n" if $self->{debug};

    $self->updateMessageAndScrollbar2();
}

# requires number of lines to insert after the last line in the text w
+idget
# if needed deletes some first lines from the text widget
# inserts the lines
sub appendLines {
    my $self = shift;
    unless ( defined $self->{fileindexed} ) {
        print STDERR "no file opened\n";
        return;
    }
    my $chunklines = shift || $self->{text_pagelines};

    my $tailmargin = $self->filelines() - $self->{text_end};
    $chunklines = min( $chunklines, $tailmargin );
    $chunklines = min( $chunklines, $self->{text_maxlines} );
    print STDERR "appendLines chunklines $chunklines\n" if $self->{deb
+ug};

    if ( $self->textlines() + $chunklines > $self->{text_maxlines} ) {
        $self->deleteFirstLines($chunklines);
    }

    my $new_end   = $self->{text_end} + $chunklines;
    my $new_begin = $new_end - min( $chunklines, $self->{text_maxlines
+} );

    my @lines = $self->{fileindexed}->getLines( $new_begin, $new_end -
+ 1 );

    for (@lines) {
        $self->{app}{text}->insert( 'end', "$_\n" );
    }
    $self->{text_end} += @lines;
    my $n = @lines;
    print STDERR "appendLines $n, now: $self->{text_begin}.,$self->{te
+xt_end}\n" if $self->{debug};

    $self->updateMessageAndScrollbar2();
}

# requires number of lines to prepend before the first line in the tex
+t widget
# if needed deletes some last lines from the text widget
# inserts the lines
sub prependLines {
    my $self = shift;
    unless ( defined $self->{fileindexed} ) {
        print STDERR "no file opened\n";
        return;
    }
    my $chunklines = shift || $self->{text_pagelines};
    $chunklines = min( $chunklines, $self->{text_begin} );
    $chunklines = min( $chunklines, $self->{text_maxlines} );
    print STDERR "prependLines chunklines $chunklines\n" if $self->{de
+bug};

    if ( $self->textlines() + $chunklines > $self->{text_maxlines} ) {
        $self->deleteLastLines($chunklines);
    }

    my $new_begin = $self->{text_begin} - $chunklines;
    my $new_end   = $new_begin + min( $chunklines, $self->{text_maxlin
+es} );

    my @lines = $self->{fileindexed}->getLines( $new_begin, $new_end -
+ 1 );
    for ( my $i = $#lines ; $i >= 0 ; --$i ) {
        $self->{app}{text}->insert( '1.0', "$lines[$i]\n" );
    }
    $self->{text_begin} -= @lines;
    my $n = @lines;
    print STDERR "prependLines $n, now: $self->{text_begin}.,$self->{t
+ext_end}\n" if $self->{debug};

    $self->updateMessageAndScrollbar2();
}

sub replaceLines {
    my $self = shift;
    my $from = shift || 0;
    my $upto = shift || $self->filelines();    # not including the las
+t one

    # clip to maxlines preserving $chunklines if possible
    my $chunklines = $upto - $from;
    $chunklines = max( 0, $chunklines );
    $chunklines = min( $chunklines, $self->{text_maxlines} );
    $from = clip( 0, $from, $self->filelines() - $self->{text_maxlines
+} - 1 );

    # for now ignore any overlap
    $self->deleteFirstLines( $self->textlines() );

    $self->{text_begin} = $from;
    my @lines = $self->{fileindexed}->getLines( $self->{text_begin}, $
+self->{text_begin} + $chunklines - 1 );
    for (@lines) {
        $self->{app}{text}->insert( 'end', "$_\n" );
    }
    $self->{text_end} = $self->{text_begin} + @lines;
    my $n = @lines;
    print STDERR "replaceLines $n, now: $self->{text_begin}.,$self->{t
+ext_end}\n" if $self->{debug};

    $self->updateMessageAndScrollbar2();
}

# updates the message in label and the second scrollbar
sub updateMessageAndScrollbar2 {
    my $self    = shift;
    my $message = shift;
    if ( defined $message ) {
        ${ $self->{app}{messageText} } = $message;
    }
    else {
        my $filelines = 0;
        if ( defined $self->{fileindexed} ) {
            $filelines = $self->{fileindexed}->numLines();
        }
        my $text_last = $self->{text_end} - 1;
        my $textlines = $self->textlines();
        ${ $self->{app}{messageText} } = "$self->{text_begin} .. $text
+_last ($textlines of $filelines)";
    }

    # update scr2
    my $lines = $self->{text_end} - $self->{text_begin};
    if ( $lines > $self->{text_pagelines} ) {
        my $num = $self->{fileindexed}->numLines();
        if ( $num > 0 ) {
            my $fr1 = $self->{text_begin} / $num;
            my $fr2 = $self->{text_end} / $num;
            $self->{app}{scr2}->set( $fr1, $fr2 );
        }
    }
}

1;

######################################################################
+############

package main;

use Tk;
use Tk::ROText;
use Tk::DialogBox;

#use Data::Dumper;

our $VERSION = 0.11;

# create the main window, a menu frame at top, a second scrollbar at r
+ight and a scrolled ROText
my $app;
$app->{mw} = MainWindow->new( -title => "hugepad" );
$app->{menu} = $app->{mw}->Frame()->pack( -side     => "top",   -fill 
+=> "x" );
$app->{scr2} = $app->{mw}->Scrollbar()->pack( -side => 'right', -fill 
+=> 'y' );
$app->{scr2}->configure( -command => \&OnScroll2, -activerelief => 'gr
+oove' );
$app->{text} =
  $app->{mw}->Scrolled( "ROText", -scrollbars => 'se', -wrap => 'none'
+ )->pack( -fill => 'both', -expand => 1 );

# add menus and dialogs
{
    $app->{menu}{file} =
      $app->{menu}->Menubutton( -text => 'File', -underline => 0, -tea
+roff => 0 )->pack( -side => 'left' );

    $app->{menu}{file}->command(
        -label   => 'Open',
        -command => sub {
            $app->{text}->delete( '1.0', 'end' );

            my $types = [ [ 'Text Files', '.txt' ], [ 'All Files', '*.
+*', ] ];
            my $file2open = $app->{mw}->getOpenFile( -filetypes => $ty
+pes );
            openFile($file2open);
        }
    );

    $app->{menu}{file}->separator;

    $app->{menu}{file}->command(
        -label   => 'Exit',
        -command => sub {
            exit(0);
        }
    );
}
{
    $app->{dlg}{about} = $app->{mw}->DialogBox( -title => "About", -bu
+ttons => ["OK"] );
    $app->{dlg}{about}->add( "Label", -text => "hugepad\na viewer for 
+huge text files\nby Rudif" )->pack;

    $app->{dlg}{help} = $app->{mw}->DialogBox( -title => "Help", -butt
+ons => ["OK"] );
    $app->{dlg}{help}
      ->add( "Label", -text => "Open a text file.\nScroll the text usi
+ng vertical arrow and page keys, mouse wheel and scrollbars." )->pack
+;

    $app->{menu}{about} =
      $app->{menu}->Menubutton( -text => 'Help', -underline => 0, -tea
+roff => 0 )->pack( -side => 'left' );

    $app->{menu}{about}->command( -label => 'About', -command => sub {
+ $app->{dlg}{about}->Show; } );
    $app->{menu}{about}->command( -label => 'Help',  -command => sub {
+ $app->{dlg}{help}->Show; } );
}
{
    my $messageText = "   ";
    $app->{messageLabel} =
      $app->{menu}->Label( -textvariable => \$messageText, -relief => 
+'sunken' )->pack( -side => 'right' );
    $app->{messageText} = \$messageText;
}

# add Inserter
my $ins = Inserter->new( app => $app );

# add bindings for MouseWheel and for the Y arrow keys
$app->{text}->bind( "<MouseWheel>", \&OnYscrolllimit );
$app->{text}->bind( "<Key-Up>",     \&OnYarrowlimit );
$app->{text}->bind( "<Key-Down>",   \&OnYarrowlimit );

# add bindings for PgDown and PgUp keys
$app->{text}->bind( "<Key-Next>",  [ sub { $ins->appendLines(); },  Ev
+('K') ] );
$app->{text}->bind( "<Key-Prior>", [ sub { $ins->prependLines(); }, Ev
+('K') ] );

# uncomment for testing only
#$app->{text}->bind( "<Shift-Key-Next>",  [ sub { $ins->appendLines();
+ }, Ev('K') ] );
#$app->{text}->bind( "<Shift-Key-Prior>", [ sub { $ins->prependLines()
+; }, Ev('K') ] );
#$app->{text}->bind( "<Key-Home>", [ sub { $ins->deleteFirstLines(); }
+, Ev('K') ] );
#$app->{text}->bind( "<Key-End>",  [ sub { $ins->deleteLastLines(); },
+  Ev('K') ] );
#$app->{text}->bind( "<Key-Home>", [ sub { $ins->replaceLines( 4000, 4
+400 ); }, Ev('K') ] );
#$app->{text}->bind( "<Key-End>",  [ sub { $ins->replaceLines( 8000, 8
+800 ); }, Ev('K') ] );

# redefine the scrollbar's callback that tells the Text to scroll
$app->{text}->Subwidget("yscrollbar")->configure( -command => \&scroll
+callback, );

# opens the file given as command line option
my $file = shift;
if ( defined $file ) {

    $ins->openFile($file);
}

# here we go
MainLoop;

### subs

# requires a filename
# opens the file in Indexer and loads the first maxlines into the Text
+ widget
sub openFile {
    my $file = shift;
    $ins->openFile($file);
}

# callback for the inner scrollbar
# scrolls the current contents of the Text widget
sub scrollcallback {

    # scrollbar tells Text widget to scroll or moveto
    $app->{text}->yview(@_);

    OnYscrolllimit();    # additional behavior
}

# on reaching the scroll limit loads another line into the Text widget
sub OnYscrolllimit {
    my ( $top, $bot ) = $app->{text}->yview;
    if ( $top == 0 ) {
        $ins->prependLines(1);    # wheel or scrollbar try to go above
+ the first line
    }
    elsif ( $bot == 1 ) {
        $ins->appendLines(1);     # wheel or scrollbar try to go below
+ the last line
    }
}

# on arrow cursor hitting the first or last line in Text widget loads 
+another line
sub OnYarrowlimit {
    my $i = int( $app->{text}->index('insert') );
    my $e = int( $app->{text}->index('end') );
    if ( $i == 1 ) {
        $ins->prependLines(1);    # up arrow hits the first line
    }
    elsif ( $i == $e - 1 ) {
        $ins->appendLines(1);     # down arrow hits the last line
    }
}

# requires a scroll command and a quantity to scroll to or scroll by
sub OnScroll2 {
    my ( $cmd, $qty ) = @_;
    if ( $cmd eq 'moveto' ) {

        # $qty (0..1) designates the relative position in file
        # of the first line to load into the text widget
        my $new_begin = int( $qty * $ins->filelines() );
        my $new_end   = $new_begin + $ins->{text_maxlines};
        $ins->replaceLines( $new_begin, $new_end );
    }
    elsif ( $cmd eq 'scroll' ) {

        #scroll by 1 page
        if ( $qty == -1 ) {
            $ins->prependLines();
        }
        elsif ( $qty == 1 ) {
            $ins->appendLines();
        }
    }
}

__END__


=head1 NAME

hugepad.pl - Perl/Tk program for viewing huge text files

=head1 SYNOPSIS

  hugepad [somehugefile.txt]

=head1 DESCRIPTION

hugepad opens a text file of up to several hundred megabytes 
for viewing in a Tk ROText window.

You can scroll the file contents using vertical arrow keys, mouse whee
+l (Windows only?), 
Page Up/Down keys and two vertical scrollbars, for fine and coarse (ra
+pid) scrolling.

It has no editing capability at this time.

=head1 VERSION

Preliminary. Comments and suggestions for improvement are welcome.

=head1 TODO

=over 4

=item * make pagesize dynamic ?

=item * goto line

=item * find

=item * filter lines with a regexp and save

=back

=head1 ACKNOWLEDGEMENTS

[BrowserUk] of perlmonks for the file indexing algorithm

[zentara] of perlmonks for advice on Tk widget method rebinding

Paul Malcher for kpad

Steve Hancock for perltidy

=head1 BUGS

No unit tests

Tested only on WinXP

Error handling is rough

=head1 FIXED BUGS

=over 4

=item initial dir in getOpenFile

I< The script has 'h:\' as initial dir, which seems silly for *NIX mac
+hines and even for most Win32 machines that have no h:\ 'share' mount
+ed.>

Removed the option -initialdir from getOpenFile() call.

=back



=head1 AUTHOR

Rudi Farkas, [Rudif] of perlmonks

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2005 by Rudi Farkas

This program is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.4 or,
at your option, any later version of Perl 5 you may have available.


=cut