package PerlMonks::Mechanized::Data; # PerlMonks::Mechanized::Data standardizes the data structure returned # by the PM::Mech user_nodes, node_info, node_content, and node_titles # methods. The add_node_data routine takes as input the ref returned # from those methods (output from XML::Simple) and a ref # to a master hash. # The structure of the master hash is shown below. #$alldata_ref: # $node_id => # { # 'node_id' => '466017', # 'root_node' => '466016', # 'parent_node' => '466016', # # 'author_user' => '333489', # 'author_name' => 'muba', # # 'title' => 'Re: regex for word puzzle', # 'content' => (node text), # 'reputation' => '17', # 'nodetype' => 'note', # # 'createtime' => '20050612204931', # 'created' => '2005-06-12 20:49:31', # 'lastupdate' => '', # 'lastedit' => '20050407145724' # } use strict; use warnings; use Carp qw( carp ); use Data::Dumper; use Exporter; our @ISA = ("Exporter"); #our @EXPORT = (); our @EXPORT_OK = qw( add_node_data ); our $VERSION = 0.01; #********************************************************************* my %integrate = ( user_nodes => \&_add_user_nodes, node_info => \&_add_node_info, node_content => \&_add_node_content, node_titles => \&_add_node_titles ); sub add_node_data { my ( $newdata_ref, $alldata_ref ) = @_; # $newdata_ref = ref to the output from the PM::Mech methods # $alldata_ref = ref to the master hash containing all node data my $datatype = _determine_data_type( $newdata_ref ); if( not defined $datatype ) { return 1; } $integrate{$datatype}->( $newdata_ref, $alldata_ref ); return 0; } #********************************************************************* sub _determine_data_type { my ( $newdata_ref ) = @_; if( ref( $newdata_ref ) eq 'HASH' && exists $newdata_ref->{INFO} && exists $newdata_ref->{NODE} ) { return 'user_nodes'; } elsif( ref( $newdata_ref ) eq 'HASH' && exists $newdata_ref->{title} && exists $newdata_ref->{author} ) { return 'node_content'; } elsif( ref( $newdata_ref ) eq 'ARRAY' && ref( $newdata_ref->[0] ) eq 'HASH' ) { return 'node_info'; } elsif( ref( $newdata_ref ) eq 'ARRAY' && ref( $newdata_ref->[0] ) eq 'ARRAY' ) { return 'node_titles'; } else { Carp::carp "\nUnrecognized data type"; print "\n"; return undef; } } sub _add_node_info { my ( $newdata_ref, $alldata_ref ) = @_; foreach my $src_ref ( @{ $newdata_ref } ) { my $node_id = $src_ref->{node_id}; my $dest_ref = \%{ $alldata_ref->{$node_id} }; my %data = ( lastupdate => $src_ref->{lastupdate}, nodetype => $src_ref->{nodetype}, root_node => $src_ref->{root_node}, title => $src_ref->{content}, createtime => $src_ref->{createtime}, node_id => $src_ref->{node_id}, author_user => $src_ref->{author_user}, author_name => $src_ref->{author_name}, parent_node => $src_ref->{parent_node} ); foreach my $key qw( root_node parent_node ) { if( not defined $data{$key} ) { # The root and parent nodes are undef if this node is # not a reply, so skip them. # We could set root and parent = $node_id in %data if # undef, instead (but it may not be expected behavior) delete $data{$key}; } } foreach my $key ( keys %data ) { if( exists $dest_ref->{$key} && $data{$key} ne $dest_ref->{$key} ) { _print_warning( $node_id, $key, $dest_ref->{$key}, $data{$key} ); } $dest_ref->{$key} = $data{$key}; } } } sub _add_node_titles { my ( $newdata_ref, $alldata_ref ) = @_; foreach my $src_ref ( @{ $newdata_ref } ) { my $node_id = $src_ref->[0]; my $title = $src_ref->[1]; my $dest_ref = \%{ $alldata_ref->{$node_id} }; if( exists $dest_ref->{title} && $title ne $dest_ref->{title} ) { _print_warning( $node_id, 'title', $dest_ref->{title}, $title ); } $dest_ref->{title} = $title; } } sub _add_node_content { my ( $newdata_ref, $alldata_ref ) = @_; # is the data in 'updated' in the same format as for 'lastupdate'? # retain the 'created' key (in a diff format than 'createtime') my $node_id = $newdata_ref->{id}; my %data = ( title => $newdata_ref->{title}, lastupdate => $newdata_ref->{updated}, created => $newdata_ref->{created}, content => $newdata_ref->{doctext}{content}, nodetype => $newdata_ref->{type}{content}, author_name => $newdata_ref->{author}{content}, author_user => $newdata_ref->{author}{id} ); my $dest_ref = \%{ $alldata_ref->{$node_id} }; foreach my $key ( keys %data ) { if( exists $dest_ref->{$key} && $data{$key} ne $dest_ref->{$key} ) { _print_warning( $node_id, $key, $dest_ref->{$key}, $data{$key} ); } $dest_ref->{$key} = $data{$key}; } } sub _add_user_nodes { my ( $newdata_ref, $alldata_ref ) = @_; my $author_name = $newdata_ref->{INFO}->[0]->{foruser}; while( my( $node_id, $noderef ) = each %{ $newdata_ref->{NODE} } ) { my %data = ( reputation => $noderef->{reputation}, created => $noderef->{createtime}, title => $noderef->{content}, lastupdate => $noderef->{lastupdate}, lastedit => $noderef->{lastedit} ); my $dest_ref = \%{ $alldata_ref->{$node_id} }; foreach my $key ( keys %data ) { if( exists $dest_ref->{$key} && $data{$key} ne $dest_ref->{$key} ) { _print_warning( $node_id, $key, $dest_ref->{$key}, $data{$key} ); } $dest_ref->{$key} = $data{$key}; } } } sub _print_warning { my ( $node_id, $key, $dest_val, $src_val ) = @_; print "Warning - data discrepancy for node ID $node_id:\n"; print " current $key = $dest_val\n"; print " new $key = $src_val\n"; print " The new data will replace the current data\n"; }