# PerlMonks::Mechanized # and its helper class 'PM::Mech::Janitor'. # Start by creating a PerlMonks::Mechanized->new() object. # If needed, login info can be stored in $ENV{PMPASS}, $ENV{PMUSER}. # To override default site URL, set $ENV{PMSITE} to full URL. # Janitor object is returned by the PerlMonks::Mechanized->janitor() # method. Don't call Janitor->new() directly. # Read on, for details. package PerlMonks::Mechanized::Janitor; use strict; use warnings; use WWW::Mechanize; # Needs a new WWW::Mech object in order to # implement rollback / commit. # Janitor class: Returned by janitor() method of PM::Mech. First # call must be to $janitor->fetch(). After that initial hit, no # server hits will occur until $janitor->commit() is invoked. # You may spawn multiple janitors, for full cached transactions. # Each Janitor invokes a new WWW::Mechanize object. # Returns a Janitor object. sub new { my( $class, $monk, $id ) = @_; my $self = {}; $self->{monk} = $monk; $self->{id} = $id; $self->{agent} = WWW::Mechanize->new( 'autocheck' => 1, 'agent' => 'PM::Mech::Janitor0.6' ); $self->{site} = $self->{monk}{site}; $self->{fetched} = 0; return bless $self, $class; } # If passed the argument of "unconsider", the target node will be # unconsidered. # Must fetch() before using any of the other methods in this class. # No return value. sub fetch { my( $self, $unconsider ) = @_; my $uncon_URI = ''; if( defined( $unconsider ) and $unconsider =~ /^Un/i ) { $uncon_URI = ";op=consider;" . "$self->{id}=unconsider"; } $self->{agent}->get( $self->{site} . '?node_id=' . $self->{id} . ';displaytype=editors' . $self->{monk}->_login_URI('force') . $uncon_URI ); $self->{agent}->success() or die "Unable to fetch Janitors view of $self->{id}.\n"; $self->{fetched} = 1; } sub get_title { my $self = shift; _verify_fetch( $self ); my $form = $self->{agent}->form_name( 'edit_node' ); $form or die "Couldn't find 'edit_node' form in get_title.\n"; return $form->value( 'update_title', 1 ); } sub set_title { my( $self, $title ) = @_; _verify_fetch( $self ); $self->{agent}->field( 'update_title', $title ); } sub get_author { my $self = shift; _verify_fetch( $self ); my $content = $self->{agent}->content(); my $author = ''; if( $content =~ m/\s+by\s+([^<]+)<\/a>/i ) { $author = $1; } else { die "Couldn't ascertain the author while scraping the " . "editor view of ID: $self->{id}.\n"; } return $author; } sub get_doctext { my $self = shift; _verify_fetch( $self ); my $form = $self->{agent}->form_name( 'edit_node' ); $form or die "Couldn't find 'edit_node' form in get_doctext.\n"; return $form->value( 'update_doctext', 1 ); } sub set_doctext { my( $self, $text ) = @_; _verify_fetch( $self ); $self->{agent}->field( 'update_doctext', $text ); } # Commits the changes made. The only change that cannot be rolled- # back is the "unconsider" change. Sorry 'bout that. ;) sub commit { my $self = shift; _verify_fetch( $self ); $self->{agent}->current_form->value( 'blah', 'update' ); $self->{agent}->click( 'blah' ); $self->{agent}->success() or die "Couldn't commit changes to $self->{id}.\n"; } # Private Janitor class function. sub _verify_fetch { my $self = shift; die "ID: $self->{id} hasn't been fetched yet.\n" unless $self->{fetched}; } 1; package PerlMonks::Mechanized; # This is your starting point. ...Create a new # PerlMonks::Mechanized object, and have fun with it. # Logins are not performed unless they are needed for the activity # you're requesting. If a login is needed, it will be done # automatically if you passed ( user, password ) to new(), or if # $ENV{PMUSER} and $ENV{PMPASS} are set. Logins are automatic, and # on-demand. However, once logged in, you stay logged in until # your PM::Mech object is destroyed. use strict; use warnings; use WWW::Mechanize; use XML::Simple; our $SITE = exists( $ENV{PMSITE} ) ? $ENV{PMSITE} : 'http://www.perlmonks.org/'; # Call new() to create PM::Mech object. Call with ( $user, $pass ) # to log in on demand, or set $ENV{} variables for on-demand login. # If no login info is supplied through new() or $ENV, you can only # do things that don't require login. You don't need to explicitly # log in. If you have supplied the proper info, it will happen when # needed, transparently. sub new { my $class = shift; my $obj = {}; $obj->{user} = defined( $_[0] ) ? $_[0] : defined( $ENV{PMUSER} ) ? $ENV{PMUSER} : ''; $obj->{passwd} = defined( $_[1] ) ? $_[1] : defined( $ENV{PMPASS} ) ? $ENV{PMPASS} : ''; $obj->{logged_in} = 0; $obj->{login_phrase} = ( $obj->{user} && $obj->{passwd} ) ? ";op=login;user=$obj->{user};" . "passwd=$obj->{passwd};expires=+10y" : ''; $obj->{site} = $SITE; $obj->{agent} = WWW::Mechanize->new( 'autocheck' => 1, 'agent' => 'PM::Mech0.61' ); bless $obj, $class; } # Given a base thread ID, returns a datastructure containing # The thread's ID's. Uses the XML ticker, "xml node thread", at # id://180684. sub threaded_ids { my( $self, $base ) = @_; $self->{agent}->get( $self->{site} . "?node_id=180684;id=$base" ); $self->{agent}->success() or die "Unable to fetch thread ticker for id = $base.\n"; my $struct = XMLin( $self->{agent}->content(), ForceArray => 1, KeepRoot => 1 ); return $struct; } # Given a base thread ID, returns a flat list of thread ID's. # Uses the XML ticker, "xml node thread", at id://180684. sub thread_list { my( $self, $base ) = @_; my $structref = threaded_ids( $self, $base ); return [ sort { $a <=> $b } _flatten_thread( $structref ) ]; } # Returns a datastructure containing info about a node or list of # nodes. Uses the XML ticker, "Node Query XML Generator", # at id://37150. Accepts a node id or list of nodes. sub node_info { my( $self, @ids ) = @_; $self->{agent}->get( $self->{site} . "?node_id=37150;nodes=" . join( ',', @ids ) . ';xmlstyle=flat' ); $self->{agent}->success() or die "Unable to fetch node query XML generator.\n"; return XMLin( $self->{agent}->content(), ForceArray => 1 )->{node}; } # Calls get_node_info() with a single ID or list of nodes. # Returns an array of arrays holding id/title pairs. Relies on # "Node Query XML Generator", at id://37150. sub node_titles { my( $self, @ids ) = @_; my $info = node_info( $self, @ids ); return [ map { [ $_->{node_id}, $_->{content} ] } @{ $info } ]; } # Grab user stats. Uses the XP XML Ticker (id://16046). # See the PM FAQ for details about valid args, and their meanings. sub user_stats { my( $self, %params ) = @_; my $parameters = ''; foreach( ( 'for_user', 'showlevels', 'for_userid','shownorm', 'showall' ) ) { $parameters .= exists( $params{$_} ) ? ";$_=$params{$_}" : ''; } $self->{agent}->get( $self->{site} . "?node_id=16046;xmlstyle=flat" . $parameters . $self->_login_URI() ); $self->{agent}->success() or die "Unable to fetch user stats ticker.\n"; return XMLin( $self->{agent}->content(), ForceArray => 1 ); } # Reads the New Chatterbox XML Ticker (id://207304) and returns a # ref to a LoL structure of CB traffic. sub chatterbox { my $self = shift; $self->{agent}->get( $self->{site} . "?node_id=207304" ); $self->{agent}->success() or die "Unable to fetch Chatterbox content XML generator.\n"; return XMLin( $self->{agent}->content(), ForceArray => 1 )->{message}; } # Talks in the CB. Messages can't be longer than 250 characters. sub say { my( $self, $message ) = @_; if( length( $message ) > 250 ) { $message = substr $message, 0, 250; } $self->{agent}->get( $self->{site} . '?' . $self->_login_URI . ';node_id=16046;op=message;message=' . $message ); $self->{agent}->success() or die "Unable to talk in the CB.\n"; } # Returns logged-in user's private messages in a datastructure. # Uses the Private Message XML Ticker (id://15848). See # node_id=379320 for information on how the parameter fields work. sub private_message { my( $self, %params ) = @_; my $parameters = ''; foreach( ( 'max_recs', 'since_id', 'prior_to', 'archived' ) ) { $parameters .= exists( $params{$_} ) ? ";$_=$params{$_}" : ''; } $self->{agent}->get( $self->{site} . "?node_id=15848;xmlstyle=clean" . $parameters . $self->_login_URI() ); $self->{agent}->success() or die "Unable to fetch Private Message ticker.\n"; return XMLin( $self->{agent}->content(), ForceArray => 1 )->{message}; } # Reads the 'Other Users XML Ticker' (id://15851) and returns a ref # to a list of other users currently logged in to the Monastery. sub other_users { my $self = shift; $self->{agent}->get( $self->{site} . "?node_id=15851" ); $self->{agent}->success() or die "Unable to fetch Other Users XML generator.\n"; return XMLin( $self->{agent}->content(), ForceArray => 1 )->{user}; } # Uses the 'displaytype=xml;xmlstyle=flat' ticker to grab an entire # single node and any available related info for that node. The # data is plopped into a datastructure that mirrors the original # XML tags, which in turn, mirror PM database columns. sub node_content { my( $self, $id ) = @_; $self->{agent}->get( $self->{site} . "?node_id=$id;displaytype=xml;xmlstyle=flat" ); $self->{agent}->success() or die "Unable to fetch node ID: $id\n"; return XMLin( $self->{agent}->content() ); } # Uses the Scratchpad Viewer's XML displaytype (id://108949) to get # a user's scratchpad. An attempt will be made to log self in, if # possible. If the logged-in user is the same as the user who's pad # we're retrieving, the private portion will also be retrieved. # This returns a datastructure. sub scratchpad { my( $self, $pad ) = @_; $pad = ( defined $pad ) ? $pad : $self->{user}; $self->{agent}->get( $self->{site} . "?node_id=108949;user=$pad;passthrough=1" . ';displaytype=xml;xmlstyle=flat' . $self->_login_URI() ); $self->{agent}->success() or die "Unable to fetch scratchpad for $pad.\n"; return XMLin( $self->{agent}->content(), ForceArray => 1 ); } # This sub uses the Newest Nodes XML Generator (id://30175) to get # a list of newest nodes. See the PerlMonks FAQ for a description # of what "types=" options you have. You may optionally specify # whether to use xmlstyle=flat (default) or xmlstyle=rss. # You may specify sinceunixtime=epocseconds, or days=decimal to # get up to 8 days worth of newest nodes. # "types" should be passed in as 'types=>[type,type,type]' # Optional params should be passed as a hashref. # Currently no validity checking is really done on params passed # to the method. sub newest_nodes { my( $self, %params ) = @_; $self->{agent}->get( $self->{site} . '?node_id=30175;xmlstyle=' . ( exists( $params{xmlstyle} ) ? $params{xmlstyle} : 'flat' ) . ( exists( $params{days} ) ? ';days=' . $params{days} : '' ) . ( exists( $params{sinceunixtime} ) ? ';sinceunixtime=' . $params{sinceunixtime} : '' ) . ( exists( $params{types} ) ? ';types=' . join( ',', @{$params{types}} ) : '' ) ); $self->{agent}->success() or die "Unable to fetch newest nodes.\n"; return XMLin( $self->{agent}->content(), ForceArray => 1 ); } # This sub fetches the displaytype=editors view of the node # indicated in $id. It returns an object of class Janitors with # the following methods: # fetch(), get_title(), set_title(), get_author(), get_doctext(), # set_doctext(), and commit(). You must always fetch() first, and # after that, the rest of the methods will have relevancy. sub janitor { my( $self, $id ) = @_; return PerlMonks::Mechanized::Janitor->new( $self, $id ); } # Private class subs. Please don't use these externally. # Called by methods that need the user to be logged in. # If the user is already logged in, this sub returns empty string. # If user isn't logged in, and it is possible to do so, this sub # returns a URI suffix to log the user in. sub _login_URI { my( $self, $independant_agent ) = @_; my $login = ''; my $logged_in = $self->{logged_in}; # Save old state. my $independant; # Flag for independant agent. if( defined( $independant_agent ) and $independant_agent ) { $self->{logged_in} = 0; $independant = 1; } if( $self->{logged_in} == 0 and $self->{login_phrase} ) { $login = $self->{login_phrase}; $self->{logged_in} = 1; } if( $independant ) { # if this is an independant agent, restore # original login flag state. $self->{logged_in} = $logged_in; } return $login; } # Used by get_thread_list() to flatten return value from # get_thread_ids(). sub _flatten_thread { my @nodes; foreach my $key ( keys %{$_[0]} ) { if ( ref( $_[0]->{$key} ) ) { push @nodes, _flatten_thread( $_[0]->{$key} ); } if ( $key =~ m/^\d+$/ ) { push @nodes, $key; } } return @nodes; } 1;