http://qs321.pair.com?node_id=605660
Category: Text Processing
Author/Contact Info SlackBladder
Description: Firstly, most of the code was donated by "Graff", however, I want to make it available to all as it works very well. This code connects to a SQL Server machine, extracts the data via a query and works out hierarchical data structures (if any exist) within a dataset of two columns of data. It's pretty fast (currently extracts over 7000 records from a SQL Server system and builds the hierarchy within around 3 to 4 seconds). I have replaced "company" specific data.
#!/usr/local/bin/perl -w

use strict;  # important for variables
use warnings;  # Gives optional warnings
use Win32::SqlServer qw(SCALAR);   # This is the module which is used 
+to connect to the RDBMS
use Tie::File;
# Create database connection object, set database and then connect wit
+h integrated security.
my $sqlsrv = Win32::SqlServer->new; # Create a new SQLSRV object
$sqlsrv->setloginproperty('Server', 'SERVERNAME'); # Set the Server pr
+operty
$sqlsrv->setloginproperty('Database', 'DATABASENAME'); # Set the datab
+ase property
$sqlsrv->connect(); # This is where we connect to the sql server syste
+m using integrated security
my $reportfile = "report.txt"; # this is where the report goes
my $resultfile = "resultset.txt";  # Store the results from the query 
+in here
my $hierarchy = "hierarchy.out"; # This is where we store the final re
+sults for the hierarchy
my $folder = "THIS_IS_A_FOLDER_NAME";
my $countrecords; my $finalcount; my $resultset; my $name; my $file; m
+y $addr; my @array; my %node; my @rows;
my @TimeArray = localtime(time);
my $year = sprintf ("%04d",$TimeArray[5]+1900);
my $monthasnum = sprintf ("%02d",$TimeArray[4]+1);
my $day =  sprintf ("%02d",$TimeArray[3]);
my $hour = sprintf ("%02d",$TimeArray[2]);
my $minute = sprintf ("%02d",$TimeArray[1]);
my $datestamp =  "$year"."_"."$monthasnum"."_"."$day"."_"."$hour" ."_"
+."$minute" ;

# Open up the reportfile, if this fails, die
open (REPORTFILE,">>$folder\\$reportfile") or die "Can not open file $
+folder\\$reportfile for writing, quitting\n";
open (RESULTFILE,">$folder\\$resultfile") or die "Can not open file $f
+older\\$resultfile for writing, quitting\n";


# Print some meaningful header information to the report
print REPORTFILE "TAG HIERARCHY REPORT\n";
print REPORTFILE "YEAR_MONTH_DAY_HOUR_MINUTE\n";
print REPORTFILE "$datestamp\n";


# Our SQL statement.  Do not alter this unless you understand the cons
+equences !!!
my $stmnt01 = <<SQLEND;
SELECT FunctionalLocationCode, SuperiorFunctionalLocation
FROM dbo.TblToFunctionalLocationMaster
where SuperiorFunctionalLocation like 'A1%' and (Objectstatuscode = 'O
+PMI' or Objectstatuscode = 'OPER')
SQLEND


# The result set file is still open from further up, don't need to ope
+n it.  Print the query results to the file
my $result = $sqlsrv->sql($stmnt01, SCALAR);
foreach $name (@$result) {
   print RESULTFILE "$name\n";
}

# Remove the very last line from the file as it contains a blank line
tie @rows, 'Tie::File', '$folder\\$resultfile' || die "Can't open: $!\
+n";
pop @rows;
untie @rows or die "Could not update $folder\\$resultfile\n $!";


# Do a quick count on the number of records in the file to be processe
+d and report
# back to the reportfile, do not need to open the file as is still ope
+n
# from previous operation
open(FILE, "< $folder\\$resultfile") or die "can't open $file: $!";
$countrecords++ while <FILE>;
# Print the contents of the counter to the report file.
print REPORTFILE "Number of records to be processed for this run is ..
+.$countrecords\n\n\n";

# close file handles
close (REPORTFILE);
close (RESULTFILE);


# Begin building the hierarchy
open(TH, ">$folder\\$hierarchy") or die "can't open $hierarchy: $!";  
+  # output file
open (DATA, "< $folder\\$resultfile") || die "could not open file: $!"
+; # input file
while (<DATA>) {
    chomp;
    my ( $c, $p ) = split /\|/;
        if ( $c eq $p ) {  # these are easy, so finish them first
        print TH;
        print TH "\n";
        next;
    }

    if ( exists( $node{$c}{child_of} )) {
        warn "$.: bad record: $c is child of both $p and $node{$c}{chi
+ld_of}\n";
        next;
    }
    $node{$c}{child_of} = $p;
    $node{$p}{parent_of}{$c} = undef;
}

# begin the sorted output by looping over values that do not have pare
+nts:

for my $parent ( grep { !exists( $node{$_}{child_of} ) } keys %node ) 
+{
    my $children = $node{$parent}{parent_of};  # ref to hash of child 
+values
    trace_down( $children, \%node );
}

sub trace_down
{
    my ( $kids, $tree ) = @_;
    for my $kid ( keys %$kids ) {
        print TH "$kid|$$tree{$kid}{child_of}\n";
        # print "$kid|$$tree{$kid}{child_of}\n";
        if ( exists( $$tree{$kid}{parent_of} )) {
            trace_down( $$tree{$kid}{parent_of}, $tree );
        }
    }
}

# Close all open file handles, this is good coding practice.
close (REPORTFILE);
close (RESULTFILE);
close (TH);
close (DATA);