In my rush for avatar-hood I forget to include the code!!
:-D forgive my impulsiveness... cut away doctors
I've modifed another program to do the stuff I want...
#!/usr/bin/perl -w
use strict;
package GetUrls;
use vars qw(@ISA);
@ISA = qw(HTML::Parser);
require HTML::Parser;
use strict;
my $parser = new GetUrls;
$parser->parse_file('C:\eechoice\pmp-forms-english.htm');
for (keys %{$parser->{URLS}})
{
print "$_ $parser->{URLS}{$_}\n";
}
sub start
{
my($self,$tag,$attr,$attrseq,$orig) = @_;
################### TAGS TO KEEP #########################
if ( $tag eq 'li') {
$self->{got_li}++;
print "<$tag>";
}
if ( $tag eq 'table') {
$self->{got_table}++;
print "<$tag>";
}
if ( $tag eq 'tr') {
$self->{got_tr}++;
print "<$tag>";
}
if ( $tag eq 'td') {
$self->{got_td}++;
print "<$tag>";
}
if ( $tag eq 'th') {
$self->{got_th}++;
print "<$tag>";
}
if ( $tag eq 'span') {
$self->{got_span}++;
print "<$tag>";
}
if ( $tag eq 'ul') {
$self->{got_ul}++;
print "<$tag>";
}
#################### TAGS TO STRIP ########################
if ( $tag eq 'a') {
if ($self->{cur_url} = $attr->{href}) {
$self->{got_href}++; }
}
if ( $tag eq 'p') {
$self->{got_p}++; }
if ( $tag eq 'body') {
$self->{got_body}++; }
if ( $tag eq 'meta') {
$self->{got_meta}++; }
if ( $tag eq 'html') {
$self->{got_html}++; }
if ( $tag eq 'head') {
$self->{got_head}++; }
if ( $tag eq 'title') {
$self->{got_title}++; }
if ( $tag eq 'br') {
$self->{got_br}++;
print "\n"}
if ( $tag eq 'font') {
$self->{got_font}++; }
if ( $tag eq 'form') {
$self->{got_form}++; }
if ( $tag eq 'input') {
$self->{got_input}++; }
}
sub end
{
my ($self,$tag) = @_;
################### TAGS TO KEEP #########################
if ($tag eq 'li' && $self->{got_li} ) {$self->{got_li}--; print "</$
+tag>";}
if ($tag eq 'table' && $self->{got_table} ) {$self->{got_table}--; p
+rint "</$tag>";}
if ($tag eq 'tr' && $self->{got_tr} ) {$self->{got_tr}--; print "</$
+tag>";}
if ($tag eq 'td' && $self->{got_td} ) {$self->{got_td}--; print "</$
+tag>";}
if ($tag eq 'th' && $self->{got_th} ) {$self->{got_th}--; print "</$
+tag>";}
if ($tag eq 'span' && $self->{got_span} ) {$self->{got_span}--; prin
+t "</$tag>";}
if ($tag eq 'ul' && $self->{got_ul} ) {$self->{got_ul}--; print "</$
+tag>";}
################### TAGS TO STRIP ########################
if ($tag eq 'a' && $self->{got_href} ) {$self->{got_href}--; }
if ($tag eq 'body' && $self->{got_body} ) {$self->{got_body}--; }
if ($tag eq 'p' && $self->{got_p} ) {$self->{got_p}--; }
if ($tag eq 'meta' && $self->{got_meta} ) {$self->{got_meta}--; }
if ($tag eq 'html' && $self->{got_html} ) {$self->{got_html}--; }
if ($tag eq 'head' && $self->{got_head} ) {$self->{got_head}--; }
if ($tag eq 'title' && $self->{got_title} ) {$self->{got_title}--; }
if ($tag eq 'br' && $self->{got_br} ) {$self->{got_br}--; }
if ($tag eq 'font' && $self->{got_font} ) {$self->{got_font}--; }
if ($tag eq 'form' && $self->{got_form} ) {$self->{got_form}--; }
if ($tag eq 'input' && $self->{got_input} ) {$self->{got_input}--; }
+
}
sub text
{
my ($self,$text ) = @_;
######################### KEEP ALL TEXT ########################
if ($self->{got_href} ) {
print $text;
# $self->{URLS}{$self->{cur_url}} .= $text;
}
# if ($self->{got_body} ) {
# print $text;
# }
if ($self->{got_li} ) {
print $text;
}
if ($self->{got_p} ) {
print $text;
}
if ($self->{got_span} ) {
print $text;
}
if ($self->{got_ul} ) {
print $text;
}
# if ($self->{got_html} ) {
# print $text;
# }
if ($self->{got_title} ) {
print "<TITLE>$text</TITLE>\n";
}
if ($self->{got_font} ) {
print "$text";
}
###################### EXCEPT ##########################
# meta, head, br, form, input
}
|