I get this:
Software error:
Couldn't get it! at Noumena1.pl line 42.
when I try to run the script. It used to work. By 'work' I mean it used attempt to strip a webpage of everything but its punctuation (don't ask) leaving the webpage format unchanged
code here
#!/usr/bin/perl -wT
# /home/sites/www.in-vacua.com/web/cgi-bin/Noumena1.pl -w
use CGI::Carp qw(fatalsToBrowser);
use strict;
use CGI ':standard';
use lib '/.users/27/inv838/Template';
use LWP::Simple;
use HTML::Parser;
use vars qw($html);
my $content;
# Configurable variables for the script
my %templates = (text => "text_output.html",
url => "url_output.html");
# Initialise a new CGI object for parameter handling, etc.
my $q = CGI->new;
# Check to see if we have any input from the user. If so,
# we go to process it. If not, we'll return a blank form
if ($q->param('text')) {
my $text = &process_text($q->param('text'));
&output_template('text',$text);
} elsif ($q->param('url')) {
my $text = &process_url($q->param('url'));
} else {
print $q->redirect("/noumena.html");
}
## Subroutine Definitions
# process_url: strip non-punctuation from html docs (harder)
sub process_url {
my ($url) = @_;
my $content = get($url); #41
die "Couldn't get it!" unless defined $content;
# Slightly ugly kludging to sort out internal document links
# on sites that don't fully qualify (damn them all)
if (!($url =~ m!^http://!)) {
$url ="http://".$url;
}
$url =~ m!(http://(.*))/!;
my $baseurl=$1 || $url;
$content =~ s!href="/(.*)"!href="$baseurl/$1"!ig;
$content =~ s!rel="/(.*)"!rel="$baseurl/$1"!ig;
$content =~ s!src="/(.*)"!src="$baseurl/$1"!ig;
# HTML::Parser is slightly odd - it uses a callback interface which
+throws
# things back into this namespace.
HTML::Parser->new(api_version => 3,
handlers => [start => [\&_html_parser_tag,
+"text"],
end => [\&_html_parser_tag,
+"text"],
text => [\&_html_parser_text,
+ "dtext"]],
marked_sections => 1,)->parse($content);
print $q->header;
print $html;
}
# html_parser_text: handler to tell HTML::Parser what to do with text
+sections
sub _html_parser_text {
my ($text) = @_;
$text =~ s!\w! !g;
$html .= $text;
}
# html_parser_tag: handler to pass html tags unmolested back to HTML::
+Parser
sub _html_parser_tag {
my ($text) = @_;
$html .= $text;
}
# output_template: use Template Toolkit to return data to the user
sub output_template {
my ($type, $text) = @_;
print $q->header;
my $template = Template->new;
$template->process($templates{$type},
{text => $text})
|| die $template->error();
}