#this will be used just for the first run #!/usr/bin/perl use strict; use warnings; use File::Find; use YAML qw/LoadFile/; use Data::Dumper; use Digest::SHA1 qw/sha1_hex/; use DBI; use DateTime; my $dbh = DBI->connect("dbi:SQLite:dbname=checksum_db.sqlite","",""); my $config_path = 'config.yml'; my $config = LoadFile($config_path); #to add columns in db for link,dir,file to know what the name column stands for... sub add_to_db { my ($checksum,$last_modif_time,$size,$name)=@_; #maybe calculating is_* should be done in process_file my $is_dir = (-d $name)?'Y':'N'; my $is_file = (-f $name)?'Y':'N'; my $is_link = (-l $name)?'Y':'N'; $dbh->do( sprintf "INSERT INTO checksums (checksum,size,last_date_modified,name,is_dir,is_file,is_link) VALUES (\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\");", $checksum, $size, $last_modif_time->ymd, $name, $is_dir, $is_file, $is_link ); }; sub delete_from_db {#remains to be completed my ($name)=@_; }; sub file2sha1 { my $file=$_[0]; return '' if -d $file; #have to find out if to prune when a directory is found that doesn't match the regex open my $f,"<$file"; my $sha1 = Digest::SHA1->new; $sha1->addfile(*$f); return $sha1->hexdigest; } sub process_file { my $dir_configs=$_[0]; ##optimisation using -d -l -f -s just once for return and also for adding #if current "file"(unix terminology) is a directory and the yaml configuration #tells us to eliminate directories from the search we do so by returning from the #callback return if -d $File::Find::name && ! $dir_configs->{dir}; return if -l $File::Find::name && ! $dir_configs->{link}; return if -f $File::Find::name && ! $dir_configs->{file}; return if -s $File::Find::name < $config->{minsize}; unless($File::Find::name =~ /$dir_configs->{regex}/) { if(-d $File::Find::name) { $File::Find::prune=1; } return; } my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, $atime,$mtime,$ctime,$blksize,$blocks) = stat($File::Find::name); my $last_modif_time=DateTime->from_epoch(epoch=>$mtime); # printf "%s %s %s %s\n", # $File::Find::name, # file2sha1($File::Find::name), # -s $File::Find::name, # $last_modif_time; add_to_db(file2sha1($File::Find::name),$last_modif_time,-s $File::Find::name,$File::Find::name); #print Dumper $dir_configs; }; for my $searched_dir_hash (@{ $config->{directories} }) { # we skip the entry if it does not exist or it is not a directory next unless (-e $searched_dir_hash->{path} && -d $searched_dir_hash->{path}); #we pass to the process_file function the yml configuration for the current directory that is searched find( { wanted=> sub { process_file($searched_dir_hash);} }, $searched_dir_hash->{path} ); }