$VAR1 = { 'retailer-2247.txt.gz' => { 'logo' => 'http://cfsi.pgcdn.com/images/retbutton_2247.gif', 'mid' => '2247', 'feedCount' => undef, 'feedNumbers' => undef, 'url' => 'http://tracking.searchmarketing.com', 'merchant' => '1STOPlighting', 'type' => 'CPC', 'parentAccount' => undef, 'feedName' => 'retailer-2247.txt.gz' } } #### { '19428' => { 'rate' => [ '7.000' ], 'state' => [ 'NJ' ] }, '2247' => { 'rate' => [ '9.750', '7.000' ], 'state' => [ 'IL', 'IN' ] } } #### 1STOPlighting http://tracking.searchmarketing.com CPC http://cfsi.pgcdn.com/images/retbutton_2247.gif 5403020 1 1 IL, IN #!/usr/bin/perl use Net::SFTP::Foreign; use IO::Uncompress::Gunzip qw(gunzip $GunzipError); use URI::Escape; use Net::FTP; use XML::LibXML; use Data::Dumper; $mdFile = "meta.xml"; my @feedFiles = <*.gz>; $fileRef = \@feedFiles; @cpaAccts = ("402", "1888", "2379", "5316", "12968", "24379", "25101", "25518"); my $taxFile = "taxinfo.txt"; ## Getting hash of taxFile my %midHash = &readTax($taxFile); ## Getting hash of metaData &writeXml(@$fileRef); print Dumper(\%metaHash); ## subroutine to get the hash of TaxFile sub readTax { my ($taxFile) = @_; my %sidHash=(); open(IN, $taxFile) || die "$taxFile couldn't be opened $@\n"; while() { chomp; next if /Retid/; my ($sid, $state, $rate) = split /\s+/; push @{$sidHash{$sid}{state}}, $state; push @{$sidHash{$sid}{rate}} , $rate; } return %sidHash; } ## subroutine to get the hash of feeds pgmetadata.xml sub writeXml { @feeds = @_; # Read the cpa accts into a hash %cpaHash = map { $cpaAccts[$_] => 1 } (0..$#cpaAccts); foreach $fr (@feeds) { $f = $fr; open(IN,sprintf("zcat %s |", $f)) || die "Could not open pipe for $f : $!"; while() { next if $. == 1; if ($. == 2) { @feedLine = split('\t'); $metaHash{$f}{feedName} = $f; $metaHash{$f}{mid} = $feedLine[0]; if ($cpaHash{$feedLine[0]} > 0) { $metaHash{$f}{type} = 'CPA'; } else { $metaHash{$f}{type} = 'CPC'; } $feedLine[4] =~ s/_/ /g; $metaHash{$f}{merchant} = $feedLine[4]; $metaHash{$f}{logo} = $feedLine[5]; $tempUrl = uri_unescape($feedLine[18]); ($metaHash{$f}{url}) = $tempUrl =~ /.*(http[s]?:\/\/[^\/\?&]+)/; $metaHash{$f}{parentAccount} = $parentAccount; $metaHash{$f}{feedCount} = $feedCount; $metaHash{$f}{feedNumbers} = $feedNumbers; last; } } close IN; } return %metaHash; } ## Subroutine to create XML file sub writeMetaDataFile { my (%tmpMetaHash) = @_; open(OUTXML,">$mdFile") || die "Could not open $mdFile for Writing"; $doc = XML::LibXML::Document->new(); $root = $doc->createElement('merchants'); $doc->setDocumentElement($root); foreach $k (sort keys %tmpMetaHash) { my $merchant = $doc->createElement('merchant'); $merchant->setAttribute('id',$tmpMetaHash{$k}{mid}); $root->appendChild($merchant); $merchant->appendTextChild('name',$tmpMetaHash{$k}{merchant}); $merchant->appendTextChild('url',$tmpMetaHash{$k}{url}); $merchant->appendTextChild('type',$tmpMetaHash{$k}{type}); $merchant->appendTextChild('logoUrl',$tmpMetaHash{$k}{logo}); $merchant->appendTextChild('ntParentAcct',$parentAcct); $merchant->appendTextChild('feedCount',$feedCount); $merchant->appendTextChild('feedNumbers',$feedNumbers); } print OUTXML $doc->toString(1); push @ftpFiles, $mdFile; }