http://qs321.pair.com?node_id=605347
Category: Fun Stuff
Author/Contact Info Zach Morgan zpmorgan%$#gmail;,com
Description: This script (ab)uses the google image search to find an image of adequate proportions from a random word from your computers dictionary file, and attempts to download it and set it as your desktop wallpaper. If you supply words arguments, then this script will use them as the basis of your search.

This is originally based on Hossman's thing that does the same thing, which uses code that was part of Webcollage. (I couldn't find it now.)

Whatever. A well oiled machine needs many ingredients.

Requires gnome2 to set the background. Could use help setting it for other systems.

EDIT:replacing dos-style newlines. how did those get there?

EDIT 2: try flickr mode now.

#!/usr/bin/perl -w

# much of this script is derived from webcollage...
# webcollage is Copyright (c) 1999-2005 by Jamie Zawinski <jwz@jwz.org
+>
# This program decorates the screen with random images from the web.
# One satisfied customer described it as "a nonstop pop culture brainb
+ath."
#
# Permission to use, copy, modify, distribute, and sell this software 
+and its
# documentation for any purpose is hereby granted without fee, provide
+d that
# the above copyright notice appear in all copies and that both that
# copyright notice and this permission notice appear in supporting
# documentation.  No representations are made about the suitability of
+ this
# software for any purpose.  It is provided "as is" without express or
# implied warranty.

use warnings;
use strict;
use LWP;
use URI;

######################################################################
+#####
# configuration options

my $engine= 'google'; #/flickr
# location of the dictionary
my $dict = "/usr/share/dict/words";
# ideal number of URLs for a good random sample in rand word mode
# (the higher it is, the longer it takes to cycle)
my $rand_sample_size = 2; 
# ideal number of URLs to cycle through in specific word mode
# (the higher it is, the longer it takes to start)
my $specific_sample_size = 400; 
# optimal image size, if 0, defaults to screen size
my ($img_width, $img_height) = (0, 0);
# fudge factor, what size percentage diff can images have
# (set to 0 if you wan't only images that are exactly $img_width x $im
+g_height)
my $fudge = 0.22;  # 0.22 means '800x600 ok for 1024x768'
# what type of images?   &PORN_OK or &FAMILY_ONLY ?
# original coder apparently wanted to prevent 'family photos', but it 
+must not
#     be a huge problem with such random words
my $img_filter = &PORN_OK; 
# seconds that have to elapse between images
my $delay = 30;
# timeout value for the LWP User Agent
my $ua_timeout = 7;
# place to store the current bg img.
my $imgfile = "$ENV{HOME}/curr-rand-bg"; # ($ENV{TMPDIR} ? $ENV{TMPDIR
+} : "/tmp/") ."curr-rand-bg.$$";
# place to store the img "on deck"
my $DEBUG_HANDLE = *STDOUT; # = *STDERR;
# command for diplaying images in the root window (file name goes afte
+r)
my $root_cmd = "gconftool-2 -t string -s /desktop/gnome/background/pic
+ture_filename ";
    # command originally was this:
    #"xv -root -quit -viewonly +noresetroot -rmode 5"
    #.          "   -rfg black -rbg black -maxpect ";
    #or maybe xsetbg would be better..
my $archive=1; #true/false
my $archivedir= "$ENV{HOME}/random_images"; #where, if so
#contains topic for file names.
my $topic_file= $archivedir . "/topic";
my $id_file= $archivedir . "/records";
my $rotation= 0; #T/F

######################################################################
+####
# main code

#parse args
my $i=0;
while (defined $ARGV[$i]){
    while (defined $ARGV[$i] and $ARGV[$i] =~ /^--(.*)$/){ # -- detect
+ed
        my $next_arg= $ARGV[$i+1];
        if($1 eq 'help'){
            &printhelp()}
        elsif($1 eq 'interval' or $1 eq 'delay'){
            $delay= $next_arg;
            die "--delay what?" unless $delay=~/^\d+$/;
            splice @ARGV, $i, 1;
        }
        elsif($1 =~ /^rotat/){
            $rotation= 1;
        }
        elsif($1 eq 'safe'){
            $img_filter= 0;
        }
        elsif($1 eq 'size'){
            $next_arg =~ m|^(\d+)x(\d+)$|;
            $img_width= $1;
            $img_height= $2;
            splice @ARGV, $i, 1;
        }
        elsif($1 eq 'flickr'){
            $engine = 'flickr'
        }
        splice @ARGV, $i, 1;
    }
    $i++
}
my $notrandom = join(' ', @ARGV); #all non-option args

if($engine eq 'flickr'){
    &init_interesting_flicker;
}

# pick the image size unless we allready have some
if (!$img_width || !$img_height) {
    &which('xdpyinfo') || die "xdpyinfo not found on \$PATH -- you hav
+e to pick a size";
    $_ = `xdpyinfo`;
    ($img_width, $img_height) = m/dimensions: *(\d+)x(\d+) /;
    if (!defined($img_height)) {
        die "xdpyinfo failed -- you have to pick a size";
    }
}

#see that tool to set wallpaper exists...
$root_cmd =~ m|^(\S*)|;
my $wp_prog= $1;
unless (which($wp_prog)){
    die "$wp_prog no find. Check out the \$root_cmd setting."
}

my $ua = new LWP::UserAgent(timeout=>$ua_timeout);
$ua->agent("Lynx 2.5"); # fuck you too

#now we get serious
archive($imgfile) if $archive;

if($rotation){
    &rotateloop;
} else {
    &mainloop;
}

sub rotateloop{ #$rotation is not default.
    my @img_paths;
    for (<$archivedir/*>){
        if($notrandom){ 
            next unless m|$archivedir/$notrandom|; #match keywords
        }
        push @img_paths, $_ if m|\d$|; #int suffix means image.
    }
    die "I'm dying because there're no $notrandom images" unless scala
+r(@img_paths);
    my $start= int(rand() * scalar(@img_paths));
    while(1){
        for(@img_paths){
            if ($start){
                $start--;
                next;
            }
            unlink $imgfile;
            symlink $_, $imgfile;
            &debug("$_ symlinked to $imgfile\n");
            &show_next_img;
            # delay x seconds before we continue
            sleep_diff($delay);
        }
    }
} 

sub mainloop{
    # this is the default: constantly pick new words, and delay
    my $pagemark=0;
    my $words;
    while (1) {
        my $has_next=1;
        my @urls;
        if($engine eq 'google'){
            if($notrandom){
                $words = $notrandom;
            }
            else{ #random words
                $words = &random_word;
                $pagemark=0;
            }
            &debug('word is '.$words."\n");
            my %urlmap = &get_n_google_images_by_size($words,
                              $img_filter,
                              $img_width, $img_height,
                              $fudge,
                              $rand_sample_size,
                              \$pagemark,
                              \$has_next);
            @urls = keys %urlmap;
            &debug(scalar(@urls) . " URLS for: $words\n");
            if (0 == scalar(@urls) or (&alltaken(\@urls) and 0 == $has
+_next)){
                # no more urls for this word(s)
                &debug("new random word\n");
                $notrandom= "";
                next;
            }
        }
        else{ #flickr
            $words= 'flickr';
            @urls= &get_interesting_urls(1);
            &debug("flickr gave ".scalar(@urls)." urls.\n");
            next unless(scalar(@urls));
        }
        #supersimple url shuffle:
        my @xurls= splice @urls, 0, rand(scalar(@urls));
        push @urls, @xurls;
        
        for my $url(@urls){
            # keep trying new random images from this set
            # until we get one we like, or we get sick of trying
            if (&detect_dupe($url)){
                &debug("Dupe: $url\n");
                if(&alltaken(\@urls)){
                    &debug("out of URLs, must hit $engine harder\n");
                    last;
                }
                next;
            }
            &debug("trying: $url\n");
            
            if (&save_page($url, $imgfile)){
                record_img_id($url);
                &set_cur_topic($words);
                &show_next_img or next;
                # delay x seconds before we continue
                sleep_diff($delay);
                archive($imgfile) if $archive;
            } else { #record url if dl failed, i say,
                record_img_id($url);
            }
        }
    }
}
####################
##    FUNCTIONS    #
####################
sub set_cur_topic{
    return unless $archive;
    unlink $topic_file;
    open(TOPIC, ">$topic_file") or die "can't open $topic_file";
    print TOPIC shift;
    close TOPIC;
}

sub get_cur_topic{
    unless (-e $topic_file){
        return ("something");
    }
    open(TOPIC, "$topic_file") or die "can't open $topic_file.";
    my $t= <TOPIC>;
    close TOPIC;
    return $t;
}

#try to prevent dupes by recording the URLs
#(could be other unique identifiers)
my %ids;
sub record_img_id{
    my $id= shift;
    $ids{$id}=1;
    return unless $archive;
    open(IDFILE, ">>$id_file") or die $!;
    print IDFILE $id . "\n";
    close IDFILE;
}
sub detect_dupe{
    my $id= shift;
    unless (%ids){ #need to parse id file
        &debug('id file not found') and return 0 unless -e $id_file;
        open(IDFILE, $id_file) or die $!;
        my @lines= <IDFILE>;
        close(IDFILE);
        %ids= map {chomp($_);$_ => 1} @lines
    }
    return exists $ids{$id} ? 1 : 0;
}
sub alltaken{
    #nothing left to download?
    my $ids= shift;
    for my $id (@$ids){
        return 0 unless $ids{$id};
    }
    return 1; #we (you and i) must have recorded all these @$ids.
}

 #############
 # net stuff #
###############

### FLICKR ###

sub init_interesting_flicker{
    # 2004/07/01 until now
    
    eval q{ use Data::Random qw(:all)};
    if ($@){
        print "Missing Data::Random\n";
        eval q{ use Date/Calc.pm};
        print "Missing Date::Calc\n" if $@;
        die;
    }
}
    
sub pick_interesting_day{
    #return an interesting flickr image url
    #example url http://www.flickr.com/explore/interesting/2004/07/01/
+page38/
    
    my $date = rand_date( min => '2004-07-01', max => 'now' );
    die "Missing Date::Calc" unless $date;
    return $date;
}



sub get_interesting_urls{
    # @_= num_of_urls
    
    # Use the Flickr interesting photo page to find interesting wallpa
+pers
    my $n= shift;
    my @urls= ();
    
    until (scalar(@urls) >= $n){
        my $date= &pick_interesting_day;
        $date=~ s|-|/|g;
        my $page= 1+ int(rand()*50);
        &debug("random date is $date, page is $page\n");
        my $F_url= 'http://www.flickr.com/explore/interesting/'. $date
+ .'/page'. $page;
      #  $F_url= "http://www.flickr.com/explore/interesting/2004/07/02
+/page41";
        my $doc= get_page($F_url);
        while($doc =~ m|DayPic\">\s*<a href=\"\S+/(\d+)/\"|g){
            my $photo_page_url= 'http://www.flickr.com/photo_zoom.gne?
+id=' . $1;
            my $photo_doc= get_page($photo_page_url) or next;
            my %img_sizes= ();
            while($photo_doc =~ m|;size=(..?)\"\D+(\d+) x (\d+)|g){
                #warn $1,' ', $2,' ', $3;
                $img_sizes{$1}= [$2, $3];
            }
            my $size= &decide_best_fit (\%img_sizes);
            next unless $size; #bad fit
            $photo_page_url .= '&size='. $size;
            $photo_doc= get_page($photo_page_url); #same page, but dif
+f size
            $photo_doc =~ m|a href="(http://farm[^\"]*)"|; 
            $photo_doc =~ m|a href="(http://farm[^\"]+)"|;
            push @urls, $1;
            last if scalar(@urls) >= $n;
        }
    }
    return @urls;
}
sub decide_best_fit{
    # @_ = \%sizes
    # compare each of the image sizes in $sizes to the screen.
    my $sizes= shift;
    my $bestfit= 0;
    my $bestfitDivergence= 1;
    my $screen_ratio= $img_width/$img_height;
    for(keys %$sizes){
        my $divergence = abs(($img_width- $sizes->{$_}[0]) / $img_widt
+h);
        $divergence += abs(($img_height- $sizes->{$_}[1]) / $img_heigh
+t);
        $divergence += 2.5*abs(($screen_ratio -($sizes->{$_}[0]/ $size
+s->{$_}[1])) / $screen_ratio);
        if($divergence < $bestfitDivergence){#better fit
            $bestfit= $_;
            $bestfitDivergence= $divergence;
        }
    }
    return $bestfit;
}

### GOOGLE ###

sub get_n_google_images_by_size {
    # @_ = $search_term, $mature, $width, $height, $fudge, $n, $$page_
+mark, $$has_next
    #
    # give some search terms, and some size prefrences, will get
    # successive google image results pages untill it finds at least
    # $n images that meet the criteria (or as many as it can find).
    #
    # returns a hash of { $url => [$w, $h] }
    my ($q, $mature, $w, $h, $f, $n, $mark, $has_next) = @_;
    
    # results isn't a hash, but this makes it easy to pool things
    my @results = (); # just remember to divide by 2 for useful sizing
    
    my $page = $$mark * 20;
    while (scalar @results / 2 < $n) {
        # get the page
        my $doc = &get_google_page($q, $mature, $page);
        
        # maybe should get out now if there's a problem
        &debug("trying google again\n") and next unless defined $doc;
        
        push @results, &cut_by_size($w, $h, $f,
                        &parse_google_img_links($doc));    
        
        unless($doc =~ m|Next</a>|){
            $$has_next = 0;
            last;
        }
        $page += 20; # 20 items per page
        $$mark++;
    }
    return @results;
}

sub cut_by_size {
    # @_ = $width, $height, $fudge, %url_to_size
    #
    # returns a subset of %url_to_size that meet the specified size cr
+itera.
    #
    # $width is the optimal width, $height is the optimal height,
    # and $fudge is a +/-percentage that the images are allowed to dev
+iate
    # in both width & height.  if $fudge is 0 then the images MUST be
    # exactly $width & $height.
    my ($width, $height, $fudge, %urls) = @_;
    my %result;

    my $min_w = $width - ($width * $fudge);
    my $min_h = $height - ($height * $fudge);
    my $max_w = $width + ($width * $fudge);
    my $max_h = $height + ($height * $fudge);
    #support a common size:
    $max_w= 1280 if int($width)==1024;
    $max_h= 1024 if int($height)==768;
    
    foreach my $url (keys %urls) {
        next unless $urls{$url}[0] >= $min_w;
        next unless $urls{$url}[1] >= $min_h;
        next unless $urls{$url}[0] <= $max_w;
        next unless $urls{$url}[1] <= $max_h;
        $result{$url} = $urls{$url};
    }
    return %result;
}

sub get_page {
    # @_ = $uri (URI object or string)
    #
    # will fetch the URI using the global $ua
    # fakes out the refer so mean sites won't serve an error img.
    #
    # returns the contents of the url, or undef if a failure
    my $uri = shift;
    $uri = new URI($uri) unless ref $uri;
    
    # get us a good refer url
    my $baseuri = $uri->clone();
    $baseuri->path("/");
    $baseuri->query(undef);

    my $req = HTTP::Request->new(GET => $uri);
    $req->referer($baseuri->as_string());
    my $res = $ua->request($req);
    if (! $res->is_success) {
        &debug("FAILED: " . $res->code() . " " . $uri->as_string() . "
+\n");
        return undef;
    }
    &debug("WTF? success, but undef\n") unless defined $res->content()
+;
    return $res->content();
}

sub save_page {
    # @_ = $uri (URI object or string), $file (string)
    #
    # saves the contents of $uri into $file
    # returns true if everythign is kosher, or false if there was a pr
+oblem
    my ($uri, $file) = @_;
    
    my $content = get_page $uri;

    return 0 unless defined $content;
    open FILE, ">$file" or &debug("can't open $file\n") and return 0;
    print FILE $content;
    close FILE;
    return 1;
}

sub get_google_page {
    # @_ = $search_term, $mature, $startnum
    #
    # searches google images for $search_term, starting at result numb
+er
    # $startnum and returns a hash of { $url => [$w, $h] }
    #
    # if $mature is true, mature content is "ok"
    #
    my ($q, $mature, $start) = @_;
    $mature = ($mature) ? 'off' : 'on';
    
    # query google
    my $gurl = new URI('http://images.google.com/images');
    $gurl->query_form('q' => $q,
              'start' => $start,
              'imgsafe' => $mature,
              'imgsz' => 'xxlarge' );
    return get_page($gurl);
}

sub parse_google_img_links {
    # @_ = $doc
    #
    # pass it the body of a google results page and it pulls out the l
+inks
    # returns a hash of { $url => [$w, $h] }

    my $doc = shift;
    my %results;

    while ($doc =~ m|(/imgres\?[^>]*)\"?|g) {
        my $uri = new URI($1);
        my %params = $uri->query_form();
        
        unless ($params{'imgurl'} =~ m|^[a-z]{1,5}://|) {
            $params{'imgurl'} = "http://" . $params{'imgurl'};
        }
        $results{$params{'imgurl'}} = [$params{'w'}, $params{'h'}];
    }
    return %results;
}

# End of web stuff

sub archive{
    if(-l $imgfile){
        &debug("not archiving symlink\n");
        unlink $imgfile;
        return;
    }
    unless(-e $archivedir){
        mkdir( $archivedir)
    }
    unless (-B $imgfile){
        &debug("$imgfile is not binary\n");
        unlink $imgfile;
        return 0;
    }
    if(-e $archivedir){
        die "$archivedir not a writable dir..." unless -d $archivedir 
+&& -w $archivedir;
        my $i=0;
        my $filepath= $archivedir .'/'. get_cur_topic();
        while(1){
            if (-e ( $filepath . $i)){
                $i++;
                next;
            }
            &debug("attempting mv of $imgfile to ".$filepath . $i . "\
+n");
            warn "unable to mv\n" unless rename($imgfile,   $filepath 
+. $i);
            last;
        }
    }
}

sub show_next_img {
    &debug("no $imgfile\n") and return 0 unless -f $imgfile;
    &debug("$imgfile is not binary\n") and return 0 unless -B $imgfile
+;

    system("$root_cmd $imgfile");
    &debug("displaying $imgfile in root window\n");
    return 1;
}

sub random_words {
    # @_ = $num
    #
    # returns an array of $num random words
    return map { random_word($_) } (1..$_[0]);
}

sub random_word {
    # returns a random word from the dictionary,
    # or undef if there was a problem
    #
    # will die if $dict can't be found.
    #
    # from webcollage

    die "no dictionary: $dict" unless -f $dict;
    
    my $word = 0;
    if (open (IN, "<$dict")) {
        my $size = (stat(IN))[7];
        my $pos = rand $size;
        if (seek (IN, $pos, 0)) {
            $word = <IN>;   # toss partial line
            $word = <IN>;   # keep next line
        }
        if (!$word) {
            seek( IN, 0, 0 );
            $word = <IN>;
        }
        close (IN);
    }
    
    return undef if (!$word);
    
    $word =~ s/^[ \t\n\r]+//;
    $word =~ s/[ \t\n\r]+$//;
    $word =~ s/ys$/y/;
    $word =~ s/ally$//;
    $word =~ s/ly$//;
    $word =~ s/ies$/y/;
    $word =~ s/ally$/al/;
    $word =~ s/izes$/ize/;
    $word =~ s/\'s$//;
    $word =~ tr/A-Z/a-z/;
    
    # if it's got a space in it, quote it
    $word = qq("$word") if ($word =~ /\s/);
    
    return $word;
}


sub which {
    # program exists in the PATH
    my ($prog) = @_;
    foreach (split (/:/, $ENV{PATH})) {
        if (-x "$_/$prog") {
            return $prog;
        }
    }
    return undef;
}

#note: no 'diff' anymore
sub sleep_diff {
    # @_  $delay
    my $delay = shift;

    &debug("waiting $delay seconds\n");
    sleep ($delay);
}

sub debug {
    # prints it's args to $DEBUG_HANDLE only if $DEBUG_HANDLE is defin
+ed
    # allways returns true;
    print $DEBUG_HANDLE @_ if defined $DEBUG_HANDLE;
    return 1;
}

sub PORN_OK { 1 }

sub printhelp{
    print <<ENDHELP;
Hellow theres..
This script (ab)uses the google image search to find an image of 
adequate proportions from a random word from your computers 
dictionary file, and attempts to download it and set it as your
desktop wallpaper. If you supply words arguments, then this script
will use them as the basis of your search.

Usage:
random-bg.pl --delay 3600
random-bg.pl jackie chan --delay 4
random_bg.pl --rotate --delay 3
random_bg.pl --flickr --delay 8

If you give it words in \@ARGV, it will continually show images
that match those words, and exit when it runs out.

Options for this script:
 
 --help
buh...

 --safe
have google filter your searches, 
for when you should be working.
 
 --rotate
 --rotation
endless loop from files in your archive. If no 
topic is specified, every file will be displayed.

 --interval INT
 --delay INT
The amount of time to delay between each
new image. The default delay is $delay.

 --flickr
Use flickr to find interesting photos.
This mode is unlikely to set a graph or map
This mode requires Data::Random and Date::calc

See the "configuration options" section in the code for 
variables you can adjust to further customize the behavior.


Credits:
* hossman wrote most of this script in 2002.
* snarius rewrote most of the interface and
   added some features in 2007.
ENDHELP
    exit;
}

sub dump_die{
    open(BLAH,">/home/zach/projects/random_bg/dump") or die "blah file
+";
    print BLAH shift;
    close BLAH;
    die;
}