http://qs321.pair.com?node_id=157614
Category: Web Stuff
Author/Contact Info lshatzer
Description: This will detect if it is a URL, file, or html and pass it to HTML::TokeParser, and returns the HTML::TokeParser object. (This was my first venture into inheritance.)
Updated: Changed a few things from Amoe's suggestions.
package HTML::TokeParser::Smart;
require 5.006;
use strict;
use warnings;

use Carp;
use LWP;
use base 'HTML::TokeParser';
our $VERSION = '0.2';

sub new {
  my $proto = shift;
  my $class = ref($proto) || $proto;
  my $url = shift;
  my $self;
  if (-e $url) {
    # It's a file!
    $self = HTML::TokeParser->new($url);
  }
  elsif ($url =~ m/^https?|^ftp|^file/) {
    # It's a URL!
    my $browser = LWP::UserAgent->new;
    my $req = $browser->request(HTTP::Request->new(GET=>$url));
    croak "Unable to get webpage: $url ", $req->status_line unless $re
+q->is_success;
    $self = HTML::TokeParser->new($req->content_ref);
  }
  elsif ($url =~ m/<[^>]+>/) {
    # It's HTML!
    $self = HTML::TokeParser->new(\$url);
  }

  else {
    croak "'$url' is neither a valid URL, file, or HTML.";
  }
  bless ($self, $class);
  return $self;
}

1;