package B::Deobfuscate;
use strict;
use warnings;
use vars '$VERSION';
use base 'B::Deparse';
use B ();
use B::Keywords ();
# Some functions may require() YAML
$VERSION = '0.03';
sub load_keywords {
my $self = shift;
my $p = $self->{+__PACKAGE__};
return $p->{'keywords'} = {
map { $_, undef }
@B::Keywords::Barewords,
# Snip the sigils.
map(substr($_,1), @B::Keywords::Symbols) };
}
sub load_unknown_dict {
my $self = shift;
my $p = $self->{+__PACKAGE__};
my $dict_file = $p->{'unknown_dict_file'};
length $dict_file or return;
my $dict_data;
# slurp the entire dictionary at once
open DICT, '<', $dict_file
or die "Cannot open dictionary $dict_file: $!";
read DICT, $dict_data, -s DICT;
close DICT or die "Cannot close $dict_file: $!";
my $k = $self->load_keywords;
$p->{'unknown_dict_data'} =
[ sort { length $a <=> length $b or $a cmp $b }
grep { ! /\W/ and ! exists $k->{$_} }
split /\n/, $dict_data ];
}
sub next_short_dict_symbol {
my $self = shift;
my $p = $self->{+__PACKAGE__};
my $sym = shift @{ $p->{'unknown_dict_data'} };
push @{ $p->{'used_symbols'} }, $sym.
return $sym;
}
sub next_long_dict_symbol {
my $self = shift;
my $p = $self->{+__PACKAGE__};
my $sym = pop @{ $p->{'unknown_dict_data'} };
push @{ $p->{'used_symbols'} }, $sym;
return $sym;
}
sub load_user_config {
my $self = shift;
my $p = $self->{+__PACKAGE__};
my $config_file = $p->{'user_config'};
defined $config_file and length $config_file or return;
-f $config_file or die "Configuration file $config_file doesn't ex
+ist";
require YAML;
my $config = (YAML::LoadFile( $config_file ))[0];
$p->{'globals_to_ignore'} = $config->{'globals_to_ignore'};
$p->{'pad_symbols'} = $config->{'lexicals'};
$p->{'gv_symbols'} = $config->{'globals'};
defined $config->{'dictionary'} and
$p->{'unknown_dict_file'} = $config->{'dictionary'};
if (defined $config->{'global_regex'}) {
my $r = $config->{'global_regex'};
$p->{'global_regex'} = qr/$r/;
}
# Symbols that are listed with an undef value actually
# just aren't renamed at all.
for my $symt_nym (qw/pad gv/) {
my $symt = $p->{"${symt_nym}_symbols"};
for my $symt_key (keys %$symt) {
not defined $symt->{$symt_key} and
$symt->{$symt_key} = $symt_key;
}
}
}
sub gv_should_be_renamed {
my $self = shift;
my $name = shift;
my $p = $self->{+__PACKAGE__};
my $k = $p->{'keywords'};
# Ignore keywords
return if exists $k->{$name} or
$name =~ m{\A[[:digit:]]\z};
if (exists $p->{'gv_symbols'}{$name} or
$name =~ $p->{'gv_match'} ) {
return 1;
}
return;
}
sub rename_pad {
my $self = shift;
my $p = $self->{+__PACKAGE__};
my $name = shift;
$name =~ m{\A(\W+)} or die "Invalid pad variable name $name";
my $sigil = $1;
my $dict = $p->{'pad_symbols'};
return $dict->{$name} if exists $dict->{$name};
$dict->{$name} = $name;
return $dict->{$name} = lc $sigil . $self->next_short_dict_symbol;
}
sub rename_gv {
my $self = shift;
my $name = shift;
my $p = $self->{+__PACKAGE__};
return $name unless $self->gv_should_be_renamed( $name );
my $dict = $p->{'gv_symbols'};
return $dict->{$name} if exists $dict->{$name};
return $dict->{$name} = ucfirst $self->next_long_dict_symbol;
}
sub new {
my $class = shift;
my $self = $class->SUPER::new( @_ );
my $p = $self->{+__PACKAGE__} = {};
$p->{'unknown_dict_file'} = '/usr/share/dict/stop';
$p->{'unknown_dict_data'} = undef;
$p->{'user_config'} = undef;
$p->{'gv_match'} = qw/\A[[:lower:][:digit:]_]+\z/;
$p->{'pad_symbols'} = {};
$p->{'gv_symbols'} = {};
$p->{'output_yaml'} = 0;
while (my $arg = shift @_) {
if ($arg =~ m{\A-d([^,]+)}) {
$p->{'unknown_dict_file'} = $1;
} elsif ($arg =~ m{\A-c([^,]+)} ) {
$p->{'user_config'} = $1;
} elsif ($arg =~ m{\A-m/([^/]+)/} ) {
$p->{'gv_match'} = $1;
} elsif ($arg =~ m{\A-y}) {
$p->{'output_yaml'} = 1;
}
}
$self->load_user_config;
$self->load_unknown_dict;
return $self;
}
sub compile {
my(@args) = @_;
return sub {
my $source = '';
my $self = B::Deobfuscate->new(@args);
$self->stash_subs("main");
$self->{'curcv'} = B::main_cv;
$self->walk_sub(B::main_cv, B::main_start);
$source .= join '', $self->print_protos;
@{$self->{'subs_todo'}} =
sort {$a->[0] <=> $b->[0]} @{$self->{'subs_todo'}};
$source .= join '', $self->indent($self->deparse(B::main_root,
+ 0)), "\n"
unless B::Deparse::null B::main_root ;
my @text;
while (scalar(@{$self->{'subs_todo'}})) {
push @text, $self->next_todo;
}
$source .= join '', $self->indent(join("", @text)), "\n" if @t
+ext;
my $p = $self->{+__PACKAGE__};
my %dump = ( lexicals => $p->{'pad_symbols'},
globals => $p->{'gv_symbols'},
dictionary => $p->{'unknown_dict_file'},
global_regex => $p->{'gv_match'} );
if ($p->{'output_yaml'}) {
require YAML;
print YAML::Dump(\%dump, $source);
}
else {
print $source;
}
}
}
sub padname {
my $self = shift;
my $padname = $self->SUPER::padname( @_ );
return $self->rename_pad( $padname );
}
sub gv_name {
my $self = shift;
my $gv_name = $self->SUPER::gv_name( @_ );
return $self->rename_gv( $gv_name );
}
1;
__END__
=head1 NAME
B::Deobfuscate - Extension to B::Deparse for use in de-obfuscating sou
+rce code
=head1 SYNOPSIS
perl -MO=Deobfuscate,-csynthetic.yml,-y synthetic.pl
=head1 DESCRIPTION
B::Deobfuscate is a backend module for the Perl compiler that generate
+s perl
source code, based on the internal compiled structure that perl itself
creates after parsing a program. It adds symbol renaming functions to
+the
B::Deparse module. An obfuscated program is already parsed and interpr
+eted
correctly by the B::Deparse program. Unfortunately, if the obfuscation
involved variable renaming then the resulting program also has obfusca
+ted
symbols.
This module takes the last step and fixes names like $z5223ed336 to be
+ a word
from a dictionary. While the name still isn't meaningful it is at leas
+t easier
to distinguish and read. Here are two examples - one from B::Deparse a
+nd one
from B::Deobfuscate.
After B::Deparse:
if (@z6a703c020a) {
(my($z5a5fa8125d, $zcc158ad3e0) = File::Temp::tempfile('UNLINK',
+ 1));
print($z5a5fa8125d "=over 8\n\n");
(print($z5a5fa8125d @z6a703c020a) or die((((q[Can't print ] . $z
+cc158ad3e0) . ': ') . $!)));
print($z5a5fa8125d "=back\n");
(close(*$z5a5fa8125d) or die((((q[Can't close ] . *$za5fa8125d)
+. ': ' . $!)));
(@z8374cc586e = $zcc158ad3e0);
($z9e5935eea4 = 1);
}
After B::Deobfuscate:
if (@parenthesises) {
(my($scrupulousity, $postprocesser) = File::Temp::tempfile('UNLI
+NK', 1));
print($scrupulousity "=over 8\n\n");
(print($scrupulousity @parenthesises) or die((((q[Can't print ]
+. $postprocesser) . ': ') . $!)));
print($scrupulousity "=back\n");
(close(*$scrupulousity) or die((((q[Can't close ] . *$postproces
+ser) . ': ') . $!)));
(@interruptable = $postprocesser);
($propagandaist = 1);
}
You'll note that the only real difference is that instead of variable
+names
like $z9e5935eea4 you get $propagandist.
Please note that this module is mainly new and untested code and is
still under development, so it may change in the future.
=head1 OPTIONS
As with all compiler backend options, these must follow directly after
the '-MO=Deobfuscate', separated by a comma but not any white space.
All options defined in B::Deparse are supported here - see the B::Depa
+rse
documentation page to see what options are provided and how to use the
+m.
=over 4
=item B<-d>I<DICTIONARY>
Normally B::Deobfuscate reads the dictionary file at /usr/share/dict/s
+top. If
you would like to specify a different dictionary follow the -d paramet
+er with
the path the file. The path may not have commas in it and only lines i
+n the
dictionary that do not match /\W/ will be used. The entire dictionary
+will be
loaded into memory at once.
-d/usr/share/dict/stop
=item B<-m>I<REGEX>
Supply a different regular expression for deciding which symbols to re
+name.
The default value is /\A[[:lower:][:digit:]_]+\z/. Your expression mus
+t be
delimited by the '/' characters and you may not use that character wit
+hin the
expression. That shouldn't be an issue because '/' isn't valid in a sy
+mbol
name anyway.
-a/\A[[:lower:][:digit:]_]+\z/
=item B<-y>
print two B<YAML> documents to STDOUT instead of the deparsed source c
+ode.
The first document is a configuration document suitable for use with t
+he B<-c>
parameter. The second document is the deparsed source code. Use this f
+eature
to generate a configuration document for further, iterative reverse en
+gineering.
=item B<-c>I<FILENAME>
Supply a filename to a B<YAML> configuration file. Normally you would
+generate this
file by saving the results of the B<-y> parameter to a file. You can t
+hen edit the
file to provide your own names for symbols and not rely on the random
+symbol picker
in B<B::Deobfuscate>. You may create your own B<YAML> configuration fi
+le as well.
=back
=head1 CONFIGURATION FILE
The B::Deobfuscation symbol renamer can be controlled with by a config
+uration file.
Use of this feature requires the L<YAML> module be installed.
dictionary: '/usr/share/dict/propernames'
global_regex: '(?:)'
globals:
kSDsfDS: Slartibartfast
HGFdsfds: Triantaphyllos
lexicals:
'$SdfSd': '$No'
'$GsdDd': '$Ed'
'$Ksdfs': '$Ji'
The following keys are recognized:
=over 4
=item B<dictionary>
This is a filename path to the operative dictionary.
dictionary: /usr/share/dict/stop
=item B<global_regex>
This regular expression tests global symbols. Only symbols that match
+this
expression may be renamed. The default value is '\A[[:lower:][:digit:]
+_]\z/.
In perl, global symbols are independent of their sigil so the values b
+eing
tested are bare. Future versions of B::Deobfuscate may add the sigil t
+o the
symbol name.
global_regex: '\A[[:lower:][:digit:]_]\z'
=item B<globals>
This is a hash detailing symbol names as used in the original source a
+nd the
name used in the deobfuscated source. For example - if the original so
+urce
has a variable named @z12345 and you wish to rename all occurrances to
+
@URLList then the hash would associate 'z12345' with 'URLList'. The di
+ctionary
picker fills these values in automatically.
If you wish to prevent B::Deobfuscate from renaming a symbol then spec
+ify the
new value as '~' (which in YAML terms is undef).
globals:
catfile: ~
opt_n: ~
opt_t: ~
opt_u: ~
z1234567890: Postprocesser
z2345678901: Constructable
z3456789012: Photosynthesises
z4567890123: Undiscriminate
z5678901234: Parenthesises
z6789012345: Animadvertion
=item B<lexicals>
Lexicals is a hash exactly like `globals' except that all the symbol n
+ames
include the sigil which doesn't currently happen for globals.
lexicals:
'$k1234567890': '$ivs'
'$k2345678901': '$ehs'
'$k3456789012': '$ans'
'$k4567890123': '$ons'
'$k5678901234': '$ofs'
'$k6789012345': '$gos'
'$k7890123456': '$dus'
'$k8901234567': '$iis'
'$k9012345678': '$ats'
'$k0123456780': '$ets'
=back
=head1 AUTHOR
Joshua b. Jore <jjore@cpan.org>
=head1 SEE ALSO
L<B::Deparse>
L<http://www.perlmonks.org/index.pl?node_id=243011>
L<http://www.perlmonks.org/index.pl?node_id=244604>
=cut
-
Are you posting in the right place? Check out Where do I post X? to know for sure.
-
Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
<code> <a> <b> <big>
<blockquote> <br /> <dd>
<dl> <dt> <em> <font>
<h1> <h2> <h3> <h4>
<h5> <h6> <hr /> <i>
<li> <nbsp> <ol> <p>
<small> <strike> <strong>
<sub> <sup> <table>
<td> <th> <tr> <tt>
<u> <ul>
-
Snippets of code should be wrapped in
<code> tags not
<pre> tags. In fact, <pre>
tags should generally be avoided. If they must
be used, extreme care should be
taken to ensure that their contents do not
have long lines (<70 chars), in order to prevent
horizontal scrolling (and possible janitor
intervention).
-
Want more info? How to link
or How to display code and escape characters
are good places to start.