Beefy Boxes and Bandwidth Generously Provided by pair Networks
Clear questions and runnable code
get the best and fastest answer
 
PerlMonks  

comment on

( #3333=superdoc: print w/replies, xml ) Need Help??
    0: #!/usr/bin/perl -w
    1: use strict;
    2: 
    3: ## recursively comparing arbitrary heterogenous data structures
    4: ## an experiment in functional programming idioms implemented in perl
    5: ##
    6: ## by anders pearson <anders@columbia.edu>
    7: ## 2001-07-06
    8: ##
    9: ## functions to traverse two arbitrary complex data structures
    10: ## (lists of lists, lists of hashes, lists of hashes of lists and scalars,
    11: ## etc, etc) comparing them by value. 
    12: ## 
    13: ## known issues: 
    14: ##   - does not deal with GLOB,CODE,LVALUE or other more exotic types
    15: ##   - makes no provision for avoiding circular references.
    16: ##     ie, it WILL get stuck on them
    17: ##
    18: ## enjoy and let me know if you see any problems or can think of
    19: ## better ways to do anything.
    20: 
    21: ########### driver functions
    22: 
    23: # takes references to two data structures and returns
    24: # 1 if they are different, 0 if they're the same
    25: # order agnostic (ie ['foo','bar'] == ['bar','foo'])
    26: 
    27: sub diff {
    28:     my $r1 = shift;
    29:     my $r2 = shift;
    30:     # ld expects references to lists
    31:     if ("ARRAY" eq ref $r1 && "ARRAY" eq ref $r2) {
    32:         return &ld("","",$r1,$r2,0,1);
    33:     } else {
    34: 	# if they're not references to lists, we just make them
    35:         return &ld("","",[$r1],[$r2],0,1);
    36:     }
    37: }
    38: 
    39: # same as diff but not order agnostic
    40: # ['foo','bar'] != ['bar','foo']
    41: sub diff_order {
    42:     my $r1 = shift;
    43:     my $r2 = shift;
    44:     # ld expects references to lists
    45:     if ("ARRAY" eq ref $r1 && "ARRAY" eq ref $r2) {
    46:         return &ld("","",$r1,$r2,0,0);
    47:     } else {
    48: 	# if they're not references to arrays, we just make them
    49:         return &ld("","",[$r1],[$r2],0,0);
    50:     }
    51: }   
    52: 
    53: # recursively compares two lists by value
    54: # works for damn near any reasonably complex structure
    55: # including: lists of scalars, lists of lists, lists of hashes, 
    56: # lists of hashes of lists of arrays of scalars, etc, etc.
    57: # arguably should be called data_structures_diff
    58: # argument $order == 1 means that we don't care about the order
    59: # ie ['foo','bar'] == ['bar','foo']
    60: 
    61: sub ld {
    62:     my $x      = shift;       # first element of first list
    63:     my $y      = shift;       # first element of second list
    64:     my $r1     = shift;       # reference to rest of first list
    65:     my $r2     = shift;       # reference to rest of second list
    66:     my $sorted = shift;       # whether or not the lists have been sorted
    67:     my $order  = shift;       # whether we're order agnostic with lists
    68: 
    69:     my $DIFFERENT = 1;
    70:     my $SAME      = 0;
    71: 
    72:     my @xs = @$r1;
    73:     my @ys = @$r2;
    74: 
    75:     if(!$sorted && $order) {
    76: 	@xs = sort @xs;
    77: 	@ys = sort @ys;
    78: 	$sorted = 1;
    79:     }
    80: 
    81:     if ($#xs != $#ys) {
    82: 	# lists are different lengths, so we know right off that
    83: 	# they must not be the same.
    84: 	return $DIFFERENT;
    85:     } else {
    86: 
    87: 	# lists are the same length, so we compare $x and $y
    88: 	# based on what they are
    89: 	if (!ref $x) {
    90: 
    91: 	    # make sure $y isn't a reference either
    92: 	    return $DIFFERENT if ref $y;
    93: 
    94: 	    # both scalars, compare them
    95: 	    return $DIFFERENT if $x ne $y;
    96: 	} else {
    97: 
    98: 	    # we're dealing with references now
    99: 	    if (ref $x ne ref $y) {
    100: 
    101: 		# they're entirely different data types
    102: 		return $DIFFERENT;
    103: 	    } elsif ("SCALAR" eq ref $x) {
    104: 
    105: 		# some values that we can actually compare
    106: 		return $DIFFERENT if $$x ne $$y;
    107: 	    } elsif ("REF" eq ref $x) {
    108: 
    109: 		# yes, we even handle references to references to references...
    110: 		return $DIFFERENT if &ld($$x,$$y,[],[],0,$order);
    111: 	    } elsif ("HASH" eq ref $x) {
    112: 
    113: 		# references to hashes are a little tricky
    114: 		# we make arrays of keys and values (keeping
    115: 		# the values in order relative to the keys)
    116: 		# and compare those.
    117: 		my @kx = sort keys %$x;
    118: 		my @ky = sort keys %$y;
    119: 		my @vx = map {$$x{$_}} @kx;
    120: 		my @vy = map {$$y{$_}} @ky;
    121: 		return $DIFFERENT
    122: 		    if &ld("", "", \@kx,\@ky,1,$order) || 
    123: 			&ld("", "", \@vx,\@vy,1,$order);
    124: 	    } elsif ("ARRAY" eq ref $x) {
    125: 		return $DIFFERENT if &ld("","",$x,$y,0,$order);
    126: 	    } else {
    127: 		# don't know how to compare anything else
    128: 		die "sorry, can't compare type ", ref $x;
    129: 	    }
    130: 	}
    131: 	if (-1 == $#xs) {
    132: 
    133: 	    # no elements left in list, this is the base case.
    134: 	    return $SAME;
    135: 	} else {
    136: 	    return &ld(shift @xs,shift @ys,\@xs,\@ys,$sorted,$order);
    137: 	}
    138: 
    139:     }
    140: }
    141: 
    142: # some simple examples
    143: my @l1 = qw/foo bar baz/;
    144: my @l2 = qw/bar foo baz/;
    145: 
    146: print "d:  ", &diff(\@l1,\@l2), "\n";
    147: print "do: ", &diff_order(\@l1,@l2), "\n";
    148: push @l1, {x => 'y'};
    149: print "d:  ", &diff(\@l1,\@l2), "\n";
    150: print "do: ", &diff_order(\@l1,@l2), "\n";
    151: push @l2, {x => 'y'};
    152: print "d:  ", &diff(\@l1,\@l2), "\n";
    153: print "do: ", &diff_order(\@l1,@l2), "\n";
    154: push @l1, [1,2,3];
    155: push @l2, [3,2,1];
    156: print "d:  ", &diff(\@l1,\@l2), "\n";
    157: print "do: ", &diff_order(\@l1,@l2), "\n";
    158: 
    159: __END__
    

In reply to recursively comparing heterogenous data structures by thraxil

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post; it's "PerlMonks-approved HTML":



  • Are you posting in the right place? Check out Where do I post X? to know for sure.
  • Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
    <code> <a> <b> <big> <blockquote> <br /> <dd> <dl> <dt> <em> <font> <h1> <h2> <h3> <h4> <h5> <h6> <hr /> <i> <li> <nbsp> <ol> <p> <small> <strike> <strong> <sub> <sup> <table> <td> <th> <tr> <tt> <u> <ul>
  • Snippets of code should be wrapped in <code> tags not <pre> tags. In fact, <pre> tags should generally be avoided. If they must be used, extreme care should be taken to ensure that their contents do not have long lines (<70 chars), in order to prevent horizontal scrolling (and possible janitor intervention).
  • Want more info? How to link or or How to display code and escape characters are good places to start.
Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others chanting in the Monastery: (3)
As of 2022-05-22 20:25 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?
    Do you prefer to work remotely?



    Results (81 votes). Check out past polls.

    Notices?