Building a data Hierarchy

Category:	Text Processing
Author/Contact Info	SlackBladder
Description:	Firstly, most of the code was donated by "Graff", however, I want to make it available to all as it works very well. This code connects to a SQL Server machine, extracts the data via a query and works out hierarchical data structures (if any exist) within a dataset of two columns of data. It's pretty fast (currently extracts over 7000 records from a SQL Server system and builds the hierarchy within around 3 to 4 seconds). I have replaced "company" specific data.
#!/usr/local/bin/perl -w use strict; # important for variables use warnings; # Gives optional warnings use Win32::SqlServer qw(SCALAR); # This is the module which is used +to connect to the RDBMS use Tie::File; # Create database connection object, set database and then connect wit +h integrated security. my $sqlsrv = Win32::SqlServer->new; # Create a new SQLSRV object $sqlsrv->setloginproperty('Server', 'SERVERNAME'); # Set the Server pr +operty $sqlsrv->setloginproperty('Database', 'DATABASENAME'); # Set the datab +ase property $sqlsrv->connect(); # This is where we connect to the sql server syste +m using integrated security my $reportfile = "report.txt"; # this is where the report goes my $resultfile = "resultset.txt"; # Store the results from the query +in here my $hierarchy = "hierarchy.out"; # This is where we store the final re +sults for the hierarchy my $folder = "THIS_IS_A_FOLDER_NAME"; my $countrecords; my $finalcount; my $resultset; my $name; my $file; m +y $addr; my @array; my %node; my @rows; my @TimeArray = localtime(time); my $year = sprintf ("%04d",$TimeArray[5]+1900); my $monthasnum = sprintf ("%02d",$TimeArray[4]+1); my $day = sprintf ("%02d",$TimeArray[3]); my $hour = sprintf ("%02d",$TimeArray[2]); my $minute = sprintf ("%02d",$TimeArray[1]); my $datestamp = "$year"."_"."$monthasnum"."_"."$day"."_"."$hour" ."_" +."$minute" ; # Open up the reportfile, if this fails, die open (REPORTFILE,">>$folder\\$reportfile") or die "Can not open file $ +folder\\$reportfile for writing, quitting\n"; open (RESULTFILE,">$folder\\$resultfile") or die "Can not open file $f +older\\$resultfile for writing, quitting\n"; # Print some meaningful header information to the report print REPORTFILE "TAG HIERARCHY REPORT\n"; print REPORTFILE "YEAR_MONTH_DAY_HOUR_MINUTE\n"; print REPORTFILE "$datestamp\n"; # Our SQL statement. Do not alter this unless you understand the cons +equences !!! my $stmnt01 = <<SQLEND; SELECT FunctionalLocationCode, SuperiorFunctionalLocation FROM dbo.TblToFunctionalLocationMaster where SuperiorFunctionalLocation like 'A1%' and (Objectstatuscode = 'O +PMI' or Objectstatuscode = 'OPER') SQLEND # The result set file is still open from further up, don't need to ope +n it. Print the query results to the file my $result = $sqlsrv->sql($stmnt01, SCALAR); foreach $name (@$result) { print RESULTFILE "$name\n"; } # Remove the very last line from the file as it contains a blank line tie @rows, 'Tie::File', '$folder\\$resultfile' \|\| die "Can't open: $!\ +n"; pop @rows; untie @rows or die "Could not update $folder\\$resultfile\n $!"; # Do a quick count on the number of records in the file to be processe +d and report # back to the reportfile, do not need to open the file as is still ope +n # from previous operation open(FILE, "< $folder\\$resultfile") or die "can't open $file: $!"; $countrecords++ while <FILE>; # Print the contents of the counter to the report file. print REPORTFILE "Number of records to be processed for this run is .. +.$countrecords\n\n\n"; # close file handles close (REPORTFILE); close (RESULTFILE); # Begin building the hierarchy open(TH, ">$folder\\$hierarchy") or die "can't open $hierarchy: $!"; + # output file open (DATA, "< $folder\\$resultfile") \|\| die "could not open file: $!" +; # input file while (<DATA>) { chomp; my ( $c, $p ) = split /\\|/; if ( $c eq $p ) { # these are easy, so finish them first print TH; print TH "\n"; next; } if ( exists( $node{$c}{child_of} )) { warn "$.: bad record: $c is child of both $p and $node{$c}{chi +ld_of}\n"; next; } $node{$c}{child_of} = $p; $node{$p}{parent_of}{$c} = undef; } # begin the sorted output by looping over values that do not have pare +nts: for my $parent ( grep { !exists( $node{$_}{child_of} ) } keys %node ) +{ my $children = $node{$parent}{parent_of}; # ref to hash of child +values trace_down( $children, \%node ); } sub trace_down { my ( $kids, $tree ) = @_; for my $kid ( keys %$kids ) { print TH "$kid\|$$tree{$kid}{child_of}\n"; # print "$kid\|$$tree{$kid}{child_of}\n"; if ( exists( $$tree{$kid}{parent_of} )) { trace_down( $$tree{$kid}{parent_of}, $tree ); } } } # Close all open file handles, this is good coding practice. close (REPORTFILE); close (RESULTFILE); close (TH); close (DATA);

Back to Code Catacombs