1: #!/usr/bin/perl -w
2: use strict;
3:
4: ## recursively comparing arbitrary heterogenous data structures
5: ## an experiment in functional programming idioms implemented in perl
6: ##
7: ## by anders pearson <anders@columbia.edu>
8: ## 2001-07-06
9: ##
10: ## functions to traverse two arbitrary complex data structures
11: ## (lists of lists, lists of hashes, lists of hashes of lists and scalars,
12: ## etc, etc) comparing them by value.
13: ##
14: ## known issues:
15: ## - does not deal with GLOB,CODE,LVALUE or other more exotic types
16: ## - makes no provision for avoiding circular references.
17: ## ie, it WILL get stuck on them
18: ##
19: ## enjoy and let me know if you see any problems or can think of
20: ## better ways to do anything.
21:
22: ########### driver functions
23:
24: # takes references to two data structures and returns
25: # 1 if they are different, 0 if they're the same
26: # order agnostic (ie ['foo','bar'] == ['bar','foo'])
27:
28: sub diff {
29: my $r1 = shift;
30: my $r2 = shift;
31: # ld expects references to lists
32: if ("ARRAY" eq ref $r1 && "ARRAY" eq ref $r2) {
33: return &ld("","",$r1,$r2,0,1);
34: } else {
35: # if they're not references to lists, we just make them
36: return &ld("","",[$r1],[$r2],0,1);
37: }
38: }
39:
40: # same as diff but not order agnostic
41: # ['foo','bar'] != ['bar','foo']
42: sub diff_order {
43: my $r1 = shift;
44: my $r2 = shift;
45: # ld expects references to lists
46: if ("ARRAY" eq ref $r1 && "ARRAY" eq ref $r2) {
47: return &ld("","",$r1,$r2,0,0);
48: } else {
49: # if they're not references to arrays, we just make them
50: return &ld("","",[$r1],[$r2],0,0);
51: }
52: }
53:
54: # recursively compares two lists by value
55: # works for damn near any reasonably complex structure
56: # including: lists of scalars, lists of lists, lists of hashes,
57: # lists of hashes of lists of arrays of scalars, etc, etc.
58: # arguably should be called data_structures_diff
59: # argument $order == 1 means that we don't care about the order
60: # ie ['foo','bar'] == ['bar','foo']
61:
62: sub ld {
63: my $x = shift; # first element of first list
64: my $y = shift; # first element of second list
65: my $r1 = shift; # reference to rest of first list
66: my $r2 = shift; # reference to rest of second list
67: my $sorted = shift; # whether or not the lists have been sorted
68: my $order = shift; # whether we're order agnostic with lists
69:
70: my $DIFFERENT = 1;
71: my $SAME = 0;
72:
73: my @xs = @$r1;
74: my @ys = @$r2;
75:
76: if(!$sorted && $order) {
77: @xs = sort @xs;
78: @ys = sort @ys;
79: $sorted = 1;
80: }
81:
82: if ($#xs != $#ys) {
83: # lists are different lengths, so we know right off that
84: # they must not be the same.
85: return $DIFFERENT;
86: } else {
87:
88: # lists are the same length, so we compare $x and $y
89: # based on what they are
90: if (!ref $x) {
91:
92: # make sure $y isn't a reference either
93: return $DIFFERENT if ref $y;
94:
95: # both scalars, compare them
96: return $DIFFERENT if $x ne $y;
97: } else {
98:
99: # we're dealing with references now
100: if (ref $x ne ref $y) {
101:
102: # they're entirely different data types
103: return $DIFFERENT;
104: } elsif ("SCALAR" eq ref $x) {
105:
106: # some values that we can actually compare
107: return $DIFFERENT if $$x ne $$y;
108: } elsif ("REF" eq ref $x) {
109:
110: # yes, we even handle references to references to references...
111: return $DIFFERENT if &ld($$x,$$y,[],[],0,$order);
112: } elsif ("HASH" eq ref $x) {
113:
114: # references to hashes are a little tricky
115: # we make arrays of keys and values (keeping
116: # the values in order relative to the keys)
117: # and compare those.
118: my @kx = sort keys %$x;
119: my @ky = sort keys %$y;
120: my @vx = map {$$x{$_}} @kx;
121: my @vy = map {$$y{$_}} @ky;
122: return $DIFFERENT
123: if &ld("", "", \@kx,\@ky,1,$order) ||
124: &ld("", "", \@vx,\@vy,1,$order);
125: } elsif ("ARRAY" eq ref $x) {
126: return $DIFFERENT if &ld("","",$x,$y,0,$order);
127: } else {
128: # don't know how to compare anything else
129: die "sorry, can't compare type ", ref $x;
130: }
131: }
132: if (-1 == $#xs) {
133:
134: # no elements left in list, this is the base case.
135: return $SAME;
136: } else {
137: return &ld(shift @xs,shift @ys,\@xs,\@ys,$sorted,$order);
138: }
139:
140: }
141: }
142:
143: # some simple examples
144: my @l1 = qw/foo bar baz/;
145: my @l2 = qw/bar foo baz/;
146:
147: print "d: ", &diff(\@l1,\@l2), "\n";
148: print "do: ", &diff_order(\@l1,@l2), "\n";
149: push @l1, {x => 'y'};
150: print "d: ", &diff(\@l1,\@l2), "\n";
151: print "do: ", &diff_order(\@l1,@l2), "\n";
152: push @l2, {x => 'y'};
153: print "d: ", &diff(\@l1,\@l2), "\n";
154: print "do: ", &diff_order(\@l1,@l2), "\n";
155: push @l1, [1,2,3];
156: push @l2, [3,2,1];
157: print "d: ", &diff(\@l1,\@l2), "\n";
158: print "do: ", &diff_order(\@l1,@l2), "\n";
159:
160: __END__