#!/usr/bin/perl
use strict; # always use this
use warnings;
use URI::Escape;
use XML::LibXML;
use Data::Dump 'pp';
my %parent = (
ParentAcct => '5403020',
feedCount => 1,
feedNumbers => 1,
);
# Read the cpa accts into a hash
my @cpaAccts = qw(402 1888 2379 5316 12968 24379 25101 25518);
my %cpaHash = map { $_ => 1 } @cpaAccts;
pp \%cpaHash;
# get metadata
my %meta=();
#my @feedFiles = <*.gz>;
my @feedFiles = ('retailer-2247.txt.gz');
for my $filename (@feedFiles){
parse_file($filename,\%meta,\%parent);
}
pp \%meta;
## Getting hash of taxFile
my $taxFile = "taxinfo.txt";
my $taxHash = readTax($taxFile);
pp $taxHash;
# combine and create XML
my $mdFile = "meta.xml";
writeMetaDataFile($mdFile,\%meta,\%parent,$taxHash);
# parse 1 file into hash
sub parse_file {
my ($filename,$href,$parent) = @_;
my $cpaHash = ();
# open IN, sprintf("zcat %s |", $filename)
# or die "Could not open pipe for $filename : $!";
my $header = <DATA>; # IN
my @f = split /[,]/,<DATA>; # change to \t
#for (0..$#f){ print "$_ $f[$_]\n" };
# close IN;
# cleanup
$f[4] =~ s/_//g;
$f[18]=~ s/[\r\n]//g;
my $url = uri_unescape($f[18]);
($f[18]) = $url =~ /.*(http[s]?:\/\/[^\/\?&]+)/;
# create record
my $rec = {
feedname => $filename,
mid => $f[0],
type => ( $cpaHash{$f[0]} ) ? 'CPA' : 'CPC',
merchant => $f[4],
logo => $f[5],
url => $f[18],
parentAccount => $parent->{parentAccount},
feedCount => $parent->{FeedCount},
feedNumber => $parent->{FeedNumber},
};
# add record to hash
$href->{$filename} = $rec;
}
## subroutine to get the hash of TaxFile
sub readTax {
my ($taxFile) = @_;
my %sidHash=();
open IN, '<', $taxFile or die "$taxFile couldn't be opened : $!\n";
while (<IN>) {
chomp;
next if /Retid/;
my ($sid, $state, $rate) = split /\s+/;
push @{$sidHash{$sid}{state}}, $state;
push @{$sidHash{$sid}{rate}} , $rate;
}
return \%sidHash;
};
## Subroutine to create XML file
sub writeMetaDataFile {
my ($xmlfile,$tmpMetaHash,$parent,$taxHash) = @_;
my $doc = XML::LibXML::Document->new();
my $root = $doc->createElement('merchants');
$doc->setDocumentElement($root);
foreach my $k (sort keys %$tmpMetaHash) {
my $rec = $tmpMetaHash->{$k};
my $id = $rec->{'mid'};
my $merchant = $doc->createElement('merchant');
$merchant->setAttribute('id',$id);
$root->appendChild($merchant);
$merchant->appendTextChild('name', $rec->{merchant});
$merchant->appendTextChild('url', $rec->{url});
$merchant->appendTextChild('type', $rec->{type});
$merchant->appendTextChild('logoUrl',$rec->{logo});
$merchant->appendTextChild('ntParentAcct',$parent->{ParentAcct});
$merchant->appendTextChild('feedCount', $parent->{feedCount});
$merchant->appendTextChild('feedNumbers', $parent->{feedNumbers});
my $rate = join ',',@{$taxHash->{$id}{state}};
$merchant->appendTextChild('StateTax', $rate);
}
print $doc->toString(1);
open OUTXML,'>', $xmlfile
or die "Could not open $xmlfile for Writing : $!";
print OUTXML $doc->toString(1);
close OUTXML;
# push @ftpFiles, $xmlFile;
}
__DATA__
header
2247,,,,1_STOP_lighting,http://cfsi.pgcdn.com/images/retbutton_2247.gi
+f,,,,,,,,,,,,,http://tracking.searchmarketing.com