Beefy Boxes and Bandwidth Generously Provided by pair Networks
laziness, impatience, and hubris
 
PerlMonks  

Gtk+ HTML Tree Viewer

by mdillon (Priest)
on Sep 20, 2000 at 21:17 UTC ( [id://33351]=sourcecode: print w/replies, xml ) Need Help??
Category: HTML Utility/GUI Programming
Author/Contact Info mike dillon <mdillon@standmed.com>
Description:

this is a rewrite of a utility i did for a job where i was using HTML::TreeBuilder and XML::XPath to parse and search normal HTML documents using the powerful XPath query language.

this utility uses HTML::TreeBuilder to parse an HTML document from a URL specified on the command line or from an internal browser location line and displays it as a Gtk+ Tree in a window. only subtrees with text nodes or anchors are expanded.

there are (simple) XPath queries displayed in the status bar that could be used to extract that node from the document (for example, by converting it to XHTML with HTML::TreeBuilder and then using XML::XPath, or by traversing the TreeBuilder parse tree and programmatically constructing an XPath parse tree).

it's probably not a bad example of simple Gtk+ GUI programming. more may be yet to come in the way of functionality (and comments).

this was written and tested against Gtk 0.7003.

there is support for using GtkHTML as well, if your installation is functional (mine was partially functional when i wrote the code, but stopped working after i upgraded from GtkHTML 0.4 to 0.6.1 and recompiled Gtk::HTML)

most recently updated: 24 Sep 2000

#!/usr/bin/perl -w

use Data::Dumper;

use Gtk;

#use Gtk::HTML;

use HTML::TreeBuilder;

use HTTP::Request::Common;

use LWP::UserAgent;

use Tie::RefHash;

use URI;

use strict;

init Gtk;
#init Gtk::Gdk::Rgb;
#init Gtk::HTML;

##
##
##

my $TITLE = 'gHTMLTree';
my $false = 0;
my $true = 1;

my $show_tooltips = $false;

##
##
##

my $tooltips;
my $filedialog;
my $window;
my $menubar;
my $pane;
my $vbox;
my $combo;
my $tree_scroller;
my $tree;
my $notebook;
my $raw_text_scroller;
my $raw_text;
my $text_scroller;
my $text;
my $attrlist_scroller;
my $attrlist;
my $html_scroller;
my $html;
my $status;
my $status_context;

##
##
##

my $p;
my $ua = new LWP::UserAgent();

##
##
##

#Gtk::Widget->set_default_colormap(Gtk::Gdk::Rgb->get_cmap());
#Gtk::Widget->set_default_visual(Gtk::Gdk::Rgb->get_visual());

$tooltips = new Gtk::Tooltips();

$window = new Gtk::Window('toplevel');
$window->set_usize(800, 600);
$window->set_title($TITLE);
$window->signal_connect(delete_event => sub { Gtk->exit(0) });

$vbox = new Gtk::VBox($false, 5);
$window->add($vbox);
$vbox->show();

{
    my @menuitems = do {
        my $dummy = new Gtk::Entry();
        $dummy->set_usize(0, 0);
        $vbox->pack_start($dummy, $false, $false, 0);

        ( {
            path => '/_File',
            type => '<Branch>',
        },
        {
            path => '/File/_Open Location...',
            accelerator => '<control>O',
            callback => sub {
                $filedialog->show();
            },
        },
        {
            path => '/File/_Quit',
            accelerator => '<control>Q',
            callback => sub { Gtk->exit(0) },
        },
        {
            path => '/_Edit',
            type => '<Branch>',
        },
        {
            path => '/Edit/_Copy',
            accelerator => '<control>C',
            callback => sub {
                if ($combo->entry->has_focus())
                {
                    $combo->entry->copy_clipboard();
                }
                elsif ($text->has_focus())
                {
                    $text->copy_clipboard();
                }
                elsif ($raw_text->has_focus())
                {
                    $raw_text->copy_clipboard();
                }
                elsif ($tree->selection() && $tree->selection()->has_f
+ocus())
                {
                    $dummy->set_text(${$tree->selection()->get_user_da
+ta()}[0]);
                    $dummy->select_region(0, length($dummy->get_text()
+));
                    $dummy->copy_clipboard();
                }
            },
        }, );
    };

    my $factory;

    my $accel = new Gtk::AccelGroup();

    $factory = new Gtk::ItemFactory('Gtk::MenuBar', '<main>', $accel);

    $factory->create_items(@menuitems);

    $window->add_accel_group($accel);

    $menubar = $factory->get_widget('<main>');
}
$vbox->pack_start($menubar, $false, $true, 0);
$menubar->show();

$combo = new Gtk::Combo();
$vbox->pack_start($combo, $false, $false, 5);
$combo->disable_activate();
$combo->set_use_arrows($true);
$combo->set_use_arrows_always($true);
$combo->set_case_sensitive($false);
$combo->entry->signal_connect(activate => \&change_location);
$combo->list->set_selection_mode('single');
$combo->list->signal_connect(selection_changed => sub {
    $combo->entry->select_region(0, length($combo->entry->get_text));
    $combo->grab_focus();
});
$combo->show();

$pane = new Gtk::HPaned();
$vbox->pack_start($pane, $true, $true, 0);
$pane->set_handle_size(10);
$pane->set_gutter_size(8);
$pane->show();

$tree_scroller = new Gtk::ScrolledWindow();
$pane->add1($tree_scroller);
$tree_scroller->set_usize(300, 500);
$tree_scroller->set_policy('automatic', 'always');
$tree_scroller->show();

$tree = new Gtk::Tree();
$tree_scroller->add_with_viewport( $tree );
$tree->set_selection_mode('browse');
$tree->set_view_mode('line');
$tree->show();

$notebook = new Gtk::Notebook();
$pane->add2($notebook);
$notebook->set_tab_pos('bottom');
$notebook->show();

$text_scroller = new Gtk::ScrolledWindow();
$notebook->append_page($text_scroller, new Gtk::Label('HTML Source (Pr
+ocessed)'));
$text_scroller->set_policy('automatic', 'automatic');
$text_scroller->show();

$text = new Gtk::Text();
$text_scroller->add($text);
$text->set_editable($false);
$text->realize();
$text->show();

$raw_text_scroller = new Gtk::ScrolledWindow();
$notebook->append_page($raw_text_scroller, new Gtk::Label('HTML Source
+ (Raw)'));
$raw_text_scroller->set_policy('automatic', 'automatic');
$raw_text_scroller->show();

$raw_text = new Gtk::Text();
$raw_text_scroller->add($raw_text);
$raw_text->set_editable($false);
$raw_text->realize();
$raw_text->show();

$attrlist_scroller = new Gtk::ScrolledWindow();
$notebook->append_page($attrlist_scroller, new Gtk::Label('Node Attrib
+utes'));
$attrlist_scroller->set_policy('automatic', 'always');
$attrlist_scroller->show();

{
    my @titles = qw(Name Value);
    $attrlist = new_with_titles Gtk::CList(@titles);
    $attrlist_scroller->add($attrlist);
    $attrlist->set_column_width(0, 75);
    $attrlist->set_column_auto_resize($_, $true) for (0 .. $#titles);
    $attrlist->set_selection_mode('single');
    $attrlist->show();
}

#$html_scroller = new Gtk::ScrolledWindow();
#$notebook->append_page($html_scroller, new Gtk::Label('HTML Viewer'))
+;
#$html_scroller->set_policy('automatic', 'automatic');
#$html_scroller->show();

#$html = new Gtk::HTML();
#$html_scroller->add($html);
#$html->set_editable($false);
#$html->realize();
#$html->show();

$status = new Gtk::Statusbar();
$vbox->pack_end($status, $false, $false, 0);
$status_context = $status->get_context_id($TITLE);
$status->show();

$filedialog = new Gtk::FileSelection(qq{$TITLE: File Browser});
$filedialog->set_modal($true);
$filedialog->ok_button->signal_connect(clicked => sub {
    $filedialog->hide();
    change_location($combo->entry, 'file:'.$filedialog->get_filename()
+);
});
$filedialog->cancel_button->signal_connect(clicked => sub { $filedialo
+g->hide() });

unless (fork)
{
    $window->show();
    change_location($combo->entry, shift) if @ARGV;
    main Gtk;
    exit(0);
}

Gtk->_exit(0);

##
##
##

{
    my $node_message;
    my $popup_node;
    my $attrlist_handle;

    sub select_node
    {
        my ($widget) = @_;

        my @data= @{$widget->get_user_data};

        $status->remove($status_context, $node_message) if defined $no
+de_message;

        $node_message = $status->push($status_context, $data[0]);

        if (@data >= 2)
        {
            $attrlist->clear() if $attrlist->rows();

            my @attrs = @{$data[1]};

            while (@attrs and @attrs % 2 == 0)
            {
                my ($key, $value) = splice(@attrs, 0, 2);

                $attrlist->append($key, $value);
            }

            Gtk->main_iteration while (Gtk->events_pending);
        }

        return 1;
    }
}

sub change_location
{
    my ($widget, @data) = @_;

    my $url = @data ? shift(@data) : $widget->get_text;

    $url = "file:$url" unless $url =~ /^[a-z_0-9-]+:/i;

    $p->delete() if defined $p;

    $p = new HTML::TreeBuilder;
    $p->ignore_unknown($false);
    $p->store_comments($true);

    load_url($url);
}

{
    my %history;

    sub load_url
    {
        my $url = shift;

        $window->set_title($TITLE);

        $combo->entry->set_text($url);

        $history{$url} = time();

        $combo->set_popdown_strings(sort
            { $history{$b} <=> $history{$a} } keys %history);

        my $children = $tree->children;
        $tree->clear_items(0, $children - 1) if $children;

        $text->set_point(0);
        $text->forward_delete($text->get_length);

        $raw_text->set_point(0);
        $raw_text->forward_delete($raw_text->get_length);

        Gtk->main_iteration while (Gtk->events_pending);

        my $fetch_msg = $status->push($status_context, "Fetching: $url
+ - 0 kB");

        Gtk->main_iteration while (Gtk->events_pending);

        my $req = GET $url;

        #my $loader = $html->signal_connect('url_requested', get_html_
+resource_loader($url));

        #my $handle = $html->begin();

        my $handle_chunk = do {
            my $last_message;
            my $received = 0;

            sub
            {
                my ($data, $response, $proto) = @_;

                my $message;

                if ($data)
                {
                    $received += length $data;

                    $raw_text->freeze();
                    $raw_text->set_point($raw_text->get_length());
                    $raw_text->insert(undef, $raw_text->style->black, 
+undef, $data);
                    $raw_text->set_position(0);
                    $raw_text->thaw();

                    Gtk->main_iteration while (Gtk->events_pending);

                    #$html->write($handle, $data);

                    #Gtk->main_iteration while (Gtk->events_pending);

                    $p->parse($data);

                    $message = $status->push($status_context,
                        sprintf("Fetching: %s - %0.1f", $url,
                        $received / 1024).'kB');
                }

                $status->remove($status_context, $last_message)
                    if defined $last_message;

                $last_message = $message;

                Gtk->main_iteration while (Gtk->events_pending);
            };
        };

        my $res = $ua->request($req, $handle_chunk, 128);

        #$html->signal_disconnect($loader);

        # clear out the status bar
        $handle_chunk->();

        if ($res->is_success)
        {
            my $build_msg = $status->push($status_context, "Building G
+TK tree");

            $status->remove($status_context, $fetch_msg);

            Gtk->main_iteration while (Gtk->events_pending);

            build_html_tree($p, $tree);

            $status->remove($status_context, $build_msg);

            Gtk->main_iteration while (Gtk->events_pending);

            $p->eof();
            #$html->end($handle, 'ok');

            $text->set_point($text->get_length());
            $text->insert(undef, $text->style->black, undef, $p->as_HT
+ML(undef, "\t", {}));
            $text->set_position(0);

            Gtk->main_iteration while (Gtk->events_pending);
        }
        else
        {
            $status->remove($status_context, $fetch_msg);

            $p->eof();
            #$html->end($handle, 'error');

            error_dialog(qq{Failed fetching "$url"});
        }

    }
}

sub get_html_resource_loader
{
    my $url = shift;

    $url =~ s{(?<=/)[^/]*$}//;

    my $base_uri = new URI($url);

    sub
    {
        my ($html, $url, $handle) = @_;

        my $uri = new_abs URI($url, $base_uri);

        $url = $uri->as_string;

        my $req = GET $url;

        my $res = $ua->request($req, sub
            {
                local $_ = shift;
                my ($response, $proto) = @_;

                Gtk->main_iteration while (Gtk->events_pending);

                if (defined and length)
                {
                    $html->write($handle, $_);
                }
                else
                {
                    die("Error fetching $url$/");
                }
            }, 8192);

        if ($res->is_success)
        {
            $html->end($handle, 'ok');
        }
        else
        {
            warn "Error fetching $url$/";
            $html->end($handle, 'error');
        }
    };
}

sub error_dialog
{
    my ($message) = @_;

    my $dialog = new Gtk::Dialog();
    $dialog->set_modal($true);

    my $label = new Gtk::Label($message);
    $dialog->vbox->pack_start($label, $true, $true, 40);
    $label->show();

    my $button = new Gtk::Button("Close");
    $dialog->action_area->pack_start($button, $true, $true, 0);
    $button->signal_connect(clicked => sub { $dialog->hide; $dialog = 
+undef; 1; });
    $button->show();

    $dialog->show();
}

sub build_html_tree
{
    my $node = shift;
    my $branch = shift;
    my $path = shift;
    my $index = shift || 1;
    my $pos = shift || 0;

    $path .= '/';
    $path .= ref $node ? ($node->tag eq '~comment'
        ? 'comment()' : $node->tag) : 'text()';
    $path .= "[$index]";

    my $leaf;
    my $expand = 0; 

    if (ref $node)
    {
        $expand = 1 if $node->tag eq 'a';

        my $label = $node->tag;

        if ($node->tag eq 'link'
            and defined(my $rel = $node->attr('rel')))
        {
            $label .= " ($rel)";
        }

        if (defined (my $href = $node->attr('href')))
        {
            $label .= ": $href";
        }
        elsif (defined (my $src = $node->attr('src')))
        {
            $label .= ": $src";
        }

        if ($node->tag eq '~comment')
        {
            my $text = create_label($node->attr('text'));

            $label = qq{<!-- $text -->};
        }

        $leaf = new Gtk::TreeItem($label);
        $leaf->set_user_data([ $path, [ $node->all_external_attr() ] ]
+);
        $branch->append($leaf);

        if (my @kids = $node->content_list)
        {
            my $subtree = new Gtk::Tree();
            $leaf->set_subtree($subtree);

            my %child_types;
            for my $child (@kids)
            {
                my $tag;

                if (ref $child)
                {
                    $tag = $child->tag;
                }
                else
                {
                    $tag = "";

                    if ($node->tag eq 'title')
                    {
                        $window->set_title("$TITLE: $child");
                    }
                }

                $expand = build_html_tree($child, $subtree, $path,
                    ++$child_types{$tag}) || $expand;
            }

            $leaf->expand() if $expand;
        }
    }
    else
    {
        my $label = create_label($node);

        $leaf = new Gtk::TreeItem(qq{"$label"});
        $leaf->set_user_data([ $path, [] ]);
        $branch->append($leaf);

        $expand = 1;
    }

    $leaf->show();
    $leaf->signal_connect('select', \&select_node);
    $tooltips->set_tip($leaf, $path, "") if $show_tooltips;

    return $expand;
}

sub create_label
{
    my $label = shift || '';

    $label =~ s/\s+/ /g;
    $label = substr($label, 0, 97).'...'
        unless length($label) < 100;    

    $label;
}

Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Node Status?
node history
Node Type: sourcecode [id://33351]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others goofing around in the Monastery: (3)
As of 2024-04-19 02:25 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found