http://qs321.pair.com?node_id=446298
Category: Text Processing
Author/Contact Info Rudif Rudi Farkas
Description: hugepad is a viewer for huge text files, where huge is up to 500 MB. Advice and help from perlmonks BrowserUk and zentara is gladly acknowledged. See also recent threads Displaying/buffering huge text files and perl Tk::Scrolled : how do I hijack existing subwidget bindings?.

#!/usr/bin/perl -w
#
# hugepad is a viewer for huge (*) text files, by Rudi Farkas (C) 2005
#
# (*) tested up to 500 MB
#

use strict;

######################################################################
+############

package FileIndexer;

# opens a text file, indexes file lines and fetches them on demand

our $VERSION = 0.1;

# requires filename of a text file of a reasonable size
# opens and indexes the file
sub new {
    my ( $class, %args ) = @_;
    my $self = {
        file  => $args{file}  || '',
        debug => $args{debug} || 0,
    };
    bless $self, $class;
    open( my $in, $self->{file} ) or die "Couldn't read $self->{file}:
+ $!";
    $self->{filehandle} = $in;
    $self->_indexFileLines();
    return $self;
}

# requires 0-based indexes of first line and of last line to fetch fro
+m file
# returns the array of lines requested
sub getLines {
    my $self = shift;
    my $i    = shift || 0;
    my $k    = shift;
    $k = $i unless defined $k;
    my @lines;
    my $fh = $self->{filehandle};
    $i = 0 if $i < 0;
    my $numlines = $self->numLines();
    $k = $numlines - 1 if $k >= $numlines;

    for my $j ( $i .. $k ) {
        my $linestart = unpack( 'd', substr $self->{index}, $j * 8, 8 
+);
        last unless defined $linestart;
        my $ok = seek $fh, $linestart, 0;
        chomp( $_ = <$fh> );
        push @lines, $_;
    }
    printf STDERR "getLines $i..$k, got %d\n", @lines + 0 if $self->{d
+ebug};
    return @lines;
}

# returns the number of lines in the file
sub numLines {
    my $self = shift;
    return length( $self->{index} ) / 8;
}

# indexes the file lines
sub _indexFileLines {
    my $self = shift;
    my $fh   = $self->{filehandle};
    seek $fh, 0, 0;
    my @index = ( pack 'd', 0 );
    push @index, pack 'd', tell $fh while <$fh>;
    pop @index;
    $self->{index} = join '', @index;
}

1;

######################################################################
+############

package Inserter;

# inserts lines fetched from indexed text file into a text widget

our $VERSION = 0.1;

sub min { $_[0] < $_[1] ? $_[0] : $_[1]; }
sub max { $_[0] > $_[1] ? $_[0] : $_[1]; }

# clip a,b,c
# requires a <= c
# returns b clipped to limits a, c
sub clip { max( $_[0], min( $_[1], $_[2] ) ) }

# requires a Tk application that provides a text widget
# and suitable inserting operations (doc TBD)
sub new {
    my $class = shift;
    my %args  = @_;
    my $self  = {
        app            => $args{app},
        debug          => $args{debug} || 0,
        fileindexed    => undef,              # initially
        debug          => 0,
        text_maxlines  => 1001,               # max number of lines in
+ the text widget
        text_pagelines => 100,                # page size for medium s
+peed scrolling
        text_begin     => 0,                  # 0-based index into fil
+e, of the first file line currently in text widget
        text_end => 0,    # 0-based index into file, of the file line 
+after the last line currently in text widget

    };
    bless $self, $class;
}

# requires a filename
# opens the file, indexes it and inserts the first page into the app's
+ text widget
sub openFile {
    my $self = shift;
    my $file = shift;
    if ( -f $file ) {
        $self->{fileindexed} = undef;                               # 
+closes the open file if any
        $self->{fileindexed} = FileIndexer->new( file => $file );
        $self->{text_begin}  = 0;                                   # 
+0-based
        $self->{text_end}    = 0;                                   # 
+0-based, after the last line gotten
        $self->{app}{text}->delete( '1.0', 'end' );
        $self->appendLines( $self->{text_maxlines} );
    }
    else {

        # TODO report error
    }
}

# returns the number of lines currently in the text widget
sub textlines {
    my $self = shift;
    return $self->{text_end} - $self->{text_begin};
}

# returns the number of file lines
sub filelines {
    my $self = shift;
    if ( defined $self->{fileindexed} ) {
        return $self->{fileindexed}->numLines();
    }
    else {
        return 0;
    }
}

# requires number of lines to delete starting at the first line in the
+ text widget
# deletes specified lines from the text widget
sub deleteFirstLines {
    my $self       = shift;
    my $chunklines = shift || $self->{text_pagelines};
    my $n          = min( $chunklines, $self->textlines() );
    $self->{app}{text}->delete( '1.0', "1.0 + $n lines" );
    $self->{text_begin} += $n;
    print STDERR "deleteFirstLines $n, now: $self->{text_begin}.,$self
+->{text_end}\n" if $self->{debug};

    $self->updateMessageAndScrollbar2();
}

# requires number of lines to delete ending with the last line in the 
+text widget
# deletes specified lines from the text widget
sub deleteLastLines {
    my $self       = shift;
    my $chunklines = shift || $self->{text_pagelines};
    my $n          = min( $chunklines, $self->textlines() );
    my $from       = $self->textlines() - $n;
    $self->{app}{text}->delete( "$from.0", 'end' );
    $self->{text_end} -= $n;
    print STDERR "deleteLastLines $n, now: $self->{text_begin}.,$self-
+>{text_end}\n" if $self->{debug};

    $self->updateMessageAndScrollbar2();
}

# requires number of lines to insert after the last line in the text w
+idget
# if needed deletes some first lines from the text widget
# inserts the lines
sub appendLines {
    my $self = shift;
    unless ( defined $self->{fileindexed} ) {
        print STDERR "no file opened\n";
        return;
    }
    my $chunklines = shift || $self->{text_pagelines};

    my $tailmargin = $self->filelines() - $self->{text_end};
    $chunklines = min( $chunklines, $tailmargin );
    $chunklines = min( $chunklines, $self->{text_maxlines} );
    print STDERR "appendLines chunklines $chunklines\n" if $self->{deb
+ug};

    if ( $self->textlines() + $chunklines > $self->{text_maxlines} ) {
        $self->deleteFirstLines($chunklines);
    }

    my $new_end   = $self->{text_end} + $chunklines;
    my $new_begin = $new_end - min( $chunklines, $self->{text_maxlines
+} );

    my @lines = $self->{fileindexed}->getLines( $new_begin, $new_end -
+ 1 );

    for (@lines) {
        $self->{app}{text}->insert( 'end', "$_\n" );
    }
    $self->{text_end} += @lines;
    my $n = @lines;
    print STDERR "appendLines $n, now: $self->{text_begin}.,$self->{te
+xt_end}\n" if $self->{debug};

    $self->updateMessageAndScrollbar2();
}

# requires number of lines to prepend before the first line in the tex
+t widget
# if needed deletes some last lines from the text widget
# inserts the lines
sub prependLines {
    my $self = shift;
    unless ( defined $self->{fileindexed} ) {
        print STDERR "no file opened\n";
        return;
    }
    my $chunklines = shift || $self->{text_pagelines};
    $chunklines = min( $chunklines, $self->{text_begin} );
    $chunklines = min( $chunklines, $self->{text_maxlines} );
    print STDERR "prependLines chunklines $chunklines\n" if $self->{de
+bug};

    if ( $self->textlines() + $chunklines > $self->{text_maxlines} ) {
        $self->deleteLastLines($chunklines);
    }

    my $new_begin = $self->{text_begin} - $chunklines;
    my $new_end   = $new_begin + min( $chunklines, $self->{text_maxlin
+es} );

    my @lines = $self->{fileindexed}->getLines( $new_begin, $new_end -
+ 1 );
    for ( my $i = $#lines ; $i >= 0 ; --$i ) {
        $self->{app}{text}->insert( '1.0', "$lines[$i]\n" );
    }
    $self->{text_begin} -= @lines;
    my $n = @lines;
    print STDERR "prependLines $n, now: $self->{text_begin}.,$self->{t
+ext_end}\n" if $self->{debug};

    $self->updateMessageAndScrollbar2();
}

sub replaceLines {
    my $self = shift;
    my $from = shift || 0;
    my $upto = shift || $self->filelines();    # not including the las
+t one

    # clip to maxlines preserving $chunklines if possible
    my $chunklines = $upto - $from;
    $chunklines = max( 0, $chunklines );
    $chunklines = min( $chunklines, $self->{text_maxlines} );
    $from = clip( 0, $from, $self->filelines() - $self->{text_maxlines
+} - 1 );

    # for now ignore any overlap
    $self->deleteFirstLines( $self->textlines() );

    $self->{text_begin} = $from;
    my @lines = $self->{fileindexed}->getLines( $self->{text_begin}, $
+self->{text_begin} + $chunklines - 1 );
    for (@lines) {
        $self->{app}{text}->insert( 'end', "$_\n" );
    }
    $self->{text_end} = $self->{text_begin} + @lines;
    my $n = @lines;
    print STDERR "replaceLines $n, now: $self->{text_begin}.,$self->{t
+ext_end}\n" if $self->{debug};

    $self->updateMessageAndScrollbar2();
}

# updates the message in label and the second scrollbar
sub updateMessageAndScrollbar2 {
    my $self    = shift;
    my $message = shift;
    if ( defined $message ) {
        ${ $self->{app}{messageText} } = $message;
    }
    else {
        my $filelines = 0;
        if ( defined $self->{fileindexed} ) {
            $filelines = $self->{fileindexed}->numLines();
        }
        my $text_last = $self->{text_end} - 1;
        my $textlines = $self->textlines();
        ${ $self->{app}{messageText} } = "$self->{text_begin} .. $text
+_last ($textlines of $filelines)";
    }

    # update scr2
    my $lines = $self->{text_end} - $self->{text_begin};
    if ( $lines > $self->{text_pagelines} ) {
        my $num = $self->{fileindexed}->numLines();
        if ( $num > 0 ) {
            my $fr1 = $self->{text_begin} / $num;
            my $fr2 = $self->{text_end} / $num;
            $self->{app}{scr2}->set( $fr1, $fr2 );
        }
    }
}

1;

######################################################################
+############

package main;

use Tk;
use Tk::ROText;
use Tk::DialogBox;

#use Data::Dumper;

our $VERSION = 0.11;

# create the main window, a menu frame at top, a second scrollbar at r
+ight and a scrolled ROText
my $app;
$app->{mw} = MainWindow->new( -title => "hugepad" );
$app->{menu} = $app->{mw}->Frame()->pack( -side     => "top",   -fill 
+=> "x" );
$app->{scr2} = $app->{mw}->Scrollbar()->pack( -side => 'right', -fill 
+=> 'y' );
$app->{scr2}->configure( -command => \&OnScroll2, -activerelief => 'gr
+oove' );
$app->{text} =
  $app->{mw}->Scrolled( "ROText", -scrollbars => 'se', -wrap => 'none'
+ )->pack( -fill => 'both', -expand => 1 );

# add menus and dialogs
{
    $app->{menu}{file} =
      $app->{menu}->Menubutton( -text => 'File', -underline => 0, -tea
+roff => 0 )->pack( -side => 'left' );

    $app->{menu}{file}->command(
        -label   => 'Open',
        -command => sub {
            $app->{text}->delete( '1.0', 'end' );

            my $types = [ [ 'Text Files', '.txt' ], [ 'All Files', '*.
+*', ] ];
            my $file2open = $app->{mw}->getOpenFile( -filetypes => $ty
+pes );
            openFile($file2open);
        }
    );

    $app->{menu}{file}->separator;

    $app->{menu}{file}->command(
        -label   => 'Exit',
        -command => sub {
            exit(0);
        }
    );
}
{
    $app->{dlg}{about} = $app->{mw}->DialogBox( -title => "About", -bu
+ttons => ["OK"] );
    $app->{dlg}{about}->add( "Label", -text => "hugepad\na viewer for 
+huge text files\nby Rudif" )->pack;

    $app->{dlg}{help} = $app->{mw}->DialogBox( -title => "Help", -butt
+ons => ["OK"] );
    $app->{dlg}{help}
      ->add( "Label", -text => "Open a text file.\nScroll the text usi
+ng vertical arrow and page keys, mouse wheel and scrollbars." )->pack
+;

    $app->{menu}{about} =
      $app->{menu}->Menubutton( -text => 'Help', -underline => 0, -tea
+roff => 0 )->pack( -side => 'left' );

    $app->{menu}{about}->command( -label => 'About', -command => sub {
+ $app->{dlg}{about}->Show; } );
    $app->{menu}{about}->command( -label => 'Help',  -command => sub {
+ $app->{dlg}{help}->Show; } );
}
{
    my $messageText = "   ";
    $app->{messageLabel} =
      $app->{menu}->Label( -textvariable => \$messageText, -relief => 
+'sunken' )->pack( -side => 'right' );
    $app->{messageText} = \$messageText;
}

# add Inserter
my $ins = Inserter->new( app => $app );

# add bindings for MouseWheel and for the Y arrow keys
$app->{text}->bind( "<MouseWheel>", \&OnYscrolllimit );
$app->{text}->bind( "<Key-Up>",     \&OnYarrowlimit );
$app->{text}->bind( "<Key-Down>",   \&OnYarrowlimit );

# add bindings for PgDown and PgUp keys
$app->{text}->bind( "<Key-Next>",  [ sub { $ins->appendLines(); },  Ev
+('K') ] );
$app->{text}->bind( "<Key-Prior>", [ sub { $ins->prependLines(); }, Ev
+('K') ] );

# uncomment for testing only
#$app->{text}->bind( "<Shift-Key-Next>",  [ sub { $ins->appendLines();
+ }, Ev('K') ] );
#$app->{text}->bind( "<Shift-Key-Prior>", [ sub { $ins->prependLines()
+; }, Ev('K') ] );
#$app->{text}->bind( "<Key-Home>", [ sub { $ins->deleteFirstLines(); }
+, Ev('K') ] );
#$app->{text}->bind( "<Key-End>",  [ sub { $ins->deleteLastLines(); },
+  Ev('K') ] );
#$app->{text}->bind( "<Key-Home>", [ sub { $ins->replaceLines( 4000, 4
+400 ); }, Ev('K') ] );
#$app->{text}->bind( "<Key-End>",  [ sub { $ins->replaceLines( 8000, 8
+800 ); }, Ev('K') ] );

# redefine the scrollbar's callback that tells the Text to scroll
$app->{text}->Subwidget("yscrollbar")->configure( -command => \&scroll
+callback, );

# opens the file given as command line option
my $file = shift;
if ( defined $file ) {

    $ins->openFile($file);
}

# here we go
MainLoop;

### subs

# requires a filename
# opens the file in Indexer and loads the first maxlines into the Text
+ widget
sub openFile {
    my $file = shift;
    $ins->openFile($file);
}

# callback for the inner scrollbar
# scrolls the current contents of the Text widget
sub scrollcallback {

    # scrollbar tells Text widget to scroll or moveto
    $app->{text}->yview(@_);

    OnYscrolllimit();    # additional behavior
}

# on reaching the scroll limit loads another line into the Text widget
sub OnYscrolllimit {
    my ( $top, $bot ) = $app->{text}->yview;
    if ( $top == 0 ) {
        $ins->prependLines(1);    # wheel or scrollbar try to go above
+ the first line
    }
    elsif ( $bot == 1 ) {
        $ins->appendLines(1);     # wheel or scrollbar try to go below
+ the last line
    }
}

# on arrow cursor hitting the first or last line in Text widget loads 
+another line
sub OnYarrowlimit {
    my $i = int( $app->{text}->index('insert') );
    my $e = int( $app->{text}->index('end') );
    if ( $i == 1 ) {
        $ins->prependLines(1);    # up arrow hits the first line
    }
    elsif ( $i == $e - 1 ) {
        $ins->appendLines(1);     # down arrow hits the last line
    }
}

# requires a scroll command and a quantity to scroll to or scroll by
sub OnScroll2 {
    my ( $cmd, $qty ) = @_;
    if ( $cmd eq 'moveto' ) {

        # $qty (0..1) designates the relative position in file
        # of the first line to load into the text widget
        my $new_begin = int( $qty * $ins->filelines() );
        my $new_end   = $new_begin + $ins->{text_maxlines};
        $ins->replaceLines( $new_begin, $new_end );
    }
    elsif ( $cmd eq 'scroll' ) {

        #scroll by 1 page
        if ( $qty == -1 ) {
            $ins->prependLines();
        }
        elsif ( $qty == 1 ) {
            $ins->appendLines();
        }
    }
}

__END__


=head1 NAME

hugepad.pl - Perl/Tk program for viewing huge text files

=head1 SYNOPSIS

  hugepad [somehugefile.txt]

=head1 DESCRIPTION

hugepad opens a text file of up to several hundred megabytes 
for viewing in a Tk ROText window.

You can scroll the file contents using vertical arrow keys, mouse whee
+l (Windows only?), 
Page Up/Down keys and two vertical scrollbars, for fine and coarse (ra
+pid) scrolling.

It has no editing capability at this time.

=head1 VERSION

Preliminary. Comments and suggestions for improvement are welcome.

=head1 TODO

=over 4

=item * make pagesize dynamic ?

=item * goto line

=item * find

=item * filter lines with a regexp and save

=back

=head1 ACKNOWLEDGEMENTS

[BrowserUk] of perlmonks for the file indexing algorithm

[zentara] of perlmonks for advice on Tk widget method rebinding

Paul Malcher for kpad

Steve Hancock for perltidy

=head1 BUGS

No unit tests

Tested only on WinXP

Error handling is rough

=head1 FIXED BUGS

=over 4

=item initial dir in getOpenFile

I< The script has 'h:\' as initial dir, which seems silly for *NIX mac
+hines and even for most Win32 machines that have no h:\ 'share' mount
+ed.>

Removed the option -initialdir from getOpenFile() call.

=back



=head1 AUTHOR

Rudi Farkas, [Rudif] of perlmonks

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2005 by Rudi Farkas

This program is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.4 or,
at your option, any later version of Perl 5 you may have available.


=cut
Replies are listed 'Best First'.
Re: hugepad
by zentara (Archbishop) on Apr 10, 2005 at 13:54 UTC
    Hi again, I was looking at this offline, and thought it would be nice if your had a "fontselect" dialog. The "Tk::FontDialog" works on Linux and Win32. Here are some additions,(which you might consider). I have it working on my linux machine, and you might want to check how changing fonts may affect your "line-numbers display".

    I didn't fully make the $font an object variable, you could do that. Anyways:

    package main; use Tk; use Tk::ROText; use Tk::DialogBox; use Tk::FontDialog; #use Data::Dumper; our $VERSION = 0.1; # create the main window, a menu frame at top, a second scrollbar at r +ight and a scrolled my $app; $app->{mw} = MainWindow->new( -title => "hugepad" ); $app->{menu} = $app->{mw}->Frame()->pack( -side => "top", -fill +=> "x" ); my $font; my $fd = $app->{mw}->FontDialog(-nicefont => 0, -title => 'Select Font', -applycmd => \&apply_font, ); $app->{fontbut} = $app->{menu}->Button(-text => 'Choose Font', -command => sub { $font = $fd->Show; apply_font($font); })->pack(-side => 'right');

    and

    sub apply_font { my $font = shift; if (defined $font) { $app->{fontbut}->configure(-font => $font); $app->{mw}->RefontTree(-font => $font); } }

    I'm not really a human, but I play one on earth. flash japh
Re: hugepad
by zentara (Archbishop) on Apr 10, 2005 at 11:23 UTC
    Very nice. I havn't thoroughly tested it, but it loads fast and works smooth. The only error on linux I see, is "Tk::Error: "h:\" is not a valid directory", when I try to open a file. You might want to post this comp.lang.perl.tk so the mainstream perlTk crowd can look at ( and enjoy) it.


    I'm not really a human, but I play one on earth. flash japh

      I've /msg'ed the author about that also. The script has 'h:\' as initial dir, which seems silly for *NIX machines and even for most Win32 machines that have no h:\ 'share' mounted.

      --
      b10m

      All code is usually tested, but rarely trusted.
        Oops, you are right. I was doubly shortsighted here. A case of Windows-blindness?

        I fixed this by simply removing the -initaldir option from getOpenFile() call. On my machine now the File Open dialog seems to default to the directory most recently used by the script.

        Does the File Open dialog now do something reasonable on *NIX machines?

        Rudif