http://qs321.pair.com?node_id=657620
Category: Text Processing
Author/Contact Info aff at cpan dot org
Description: Local Shared Object (LSO), sometimes known as flash cookies, is a cookie-like data entity used by Adobe Flash Player. LSOs are stored as files on the local file system with the .sol extension. This module reads a Local Shared Object file and return content as a list.
package Sol::Parser;

use strict;
use warnings;

use Log::Log4perl;
use Pod::Usage;
use Readonly;

Readonly my $LENGTH_OF_FLOAT   => 8;
Readonly my $LENGTH_OF_INTEGER => 2;
Readonly my $LENGTH_OF_LONG    => 4;
Readonly my $END_OF_OBJECT     => "\x00\x00\x09";

my $conf = q(
  log4perl.category.sol.parser             = WARN, ScreenAppender
  log4perl.appender.ScreenAppender         = Log::Log4perl::Appender::
+Screen
  log4perl.appender.ScreenAppender.stderr  = 0
  log4perl.appender.ScreenAppender.layout  = PatternLayout
  log4perl.appender.ScreenAppender.layout.ConversionPattern=[%p] %d %M
+:%L  %m%n
);
Log::Log4perl::init( \$conf );
my $log  = Log::Log4perl::->get_logger(q(sol.parser));

my $file = undef;
my $FH   = undef;

my %datatype = (
                0x0 => 'number',
                0x1 => 'boolean',
                0x2 => 'string',
                0x3 => 'object',
                0x5 => 'null',
                0x6 => 'undefined',
                0x8 => 'array',
                0xa => 'raw-array',
                0xb => 'object-date',
                0xd => 'object-string-number-boolean-textformat',
                0xf => 'object-xml',
                0x10 => 'object-customclass',
               );

# Return type, value in list context.
sub getTypeAndValue {

  $log->logdie("expected to be called in LIST context") if !wantarray(
+);

  # Read data type
  my $value = undef;
  my $type = getBytes(1);
  my $type_as_txt = $datatype{$type};
  if (!exists($datatype{$type})) {
    $log->warn(qq{Missing datatype for '$type'!}) if $log->is_warn();
  }

  # Read element depending on type
  if($type == 0) {
    $value =  getFloat();
  } elsif($type == 1){
    $value =  getBytes(1);
  } elsif ($type == 2) {
    $value =  getString();
  } elsif($type == 3){
    $value =  getObject();
  } elsif($type == 5) {   # null
    $value = undef;
  } elsif($type == 6) {   # undef
    $value = undef;
  } elsif($type == 8){    # array
    $value = getArray();
  } elsif($type == 0xb){  # date
    $log->logdie("Not implemented yet: date");
  } elsif($type == 0xf){  # doublestring
    $log->logdie("Not implemented yet: doublestring");
  } elsif($type == 0x10){ # customclass
    $value = getObject(1);
  } else {
    $log->logdie("Unknown type:$type" );
  }

  return ($type_as_txt, $value);
}

# Return object - if customClass argument is given then read two
# strings instead of one.
sub getObject {
  my $customClass = shift;
  my @retvals = ();
  while (eof($FH) != 1) {
    # Read until end flag is detected : 00 00 09
    if (getraw(3) eq $END_OF_OBJECT) {
      return join(q{,}, @retvals);
    }

    # "un-read" the 3 bytes
    seek($FH, -3, 1) or $log->logdie("seek failed");

    # Read name
    my $name = getString();
    $log->debug(qq{name:$name}) if $log->is_debug();

    # Read 2nd name if customClass is set
    if ($customClass) {
      push @retvals, q{class_name=} . $name . q{;};
      my $name = getString();
      $log->debug(qq{name:$name (2nd name - customClass)}) if $log->is
+_debug();
      $customClass = 0;
    }

    # Get data type and value
    my ($type, $value) = getTypeAndValue();
    $log->debug(qq{type:$type value:$value}) if $log->is_debug();

    push @retvals, $name . q{;} . $value;
  }
  $log->logdie("Syntax error: reached end-of-file before end-of-object
+");
}

# Return array (list)
sub getArray {
  my @retvals = ();
  my $count = getlong();
  if($count == 0) {
    return getObject();
  }

 ELEMENT:
  while ($count-- > 0) {
    my $name = getString();

    if (!defined($name)) {
      last ELEMENT;
    }

    my $retval = undef;
    my ($type, $value) = getTypeAndValue();
    {
      no warnings q{uninitialized}; # allow undef values
      $log->debug(qq{$name;$type;$value}) if $log->is_debug();
      $retval = qq{$name;$type;$value};
    }
    push @retvals, $retval;
  }

  # Now expect END_OF_OBJECT tag to be next
  if (getraw(3) eq $END_OF_OBJECT) {
    return join(q{,}, @retvals);
  }

  $log->error(q{Did not find expected END_OF_OBJECT! at end of array!}
+) if $log->is_error();
  return;
}

sub getraw {
  my $len = shift;
  $log->logdie("missing length argument") unless $len;
  my $buffer = undef;
  my $num = read($FH, $buffer, $len);
  return $buffer;
}

# read given number of bytes, default 1;
sub getBytes {
  my $len = shift || 1;
  my $buffer = undef;
  my $num = read($FH, $buffer, $len);
  return unpack("c*", $buffer);
}

# Read string: first 2 bytes length, then string itself.  operates on
# global filehandle FH.  Read length first unless length is given,
# otherwise read the given number of bytes.
sub getString {
  my $len = shift;
  my $buffer = undef;
  my $num = undef;

  # read length from filehandle unless set
  $len = join(q{}, getBytes(2)) unless ($len);

  # return undef if length is zero
  return unless $len;

  $log->debug(qq{len:$len}) if $log->is_debug();
  $num = read($FH, $buffer, $len);
  $log->debug(qq{buffer:$buffer}) if $log->is_debug();
  return $buffer;
}

# read integer number, default 2 bytes
sub getint {
  my $len = shift || $LENGTH_OF_INTEGER;
  my $buffer = undef;
  my $num = read($FH, $buffer, $len);
  return unpack 'c*', reverse $buffer;
}

# read long integer number, default 4 bytes
sub getlong {
  my $len = shift || $LENGTH_OF_LONG;
  my $buffer = undef;
  my $num = read($FH, $buffer, $len);
  return unpack 'c*', reverse $buffer;
}

# read floating point number: default 8 bytes
sub getFloat {
  my $len = shift || $LENGTH_OF_FLOAT;
  my $buffer = undef;
  my $num = read($FH, $buffer, $len);
  return unpack 'd*', reverse $buffer;
}

# Read file header - 16 bytes in total. Return name if file starts
# with sol header, otherwise undef.  Failure means the 'TCSO' tag is
# missing.
sub readHeader {

  # skip first 6 bytes
  getString(6);

  # next 4 bytes should contain 'TSCO' tag
  if (getString(4) ne q{TCSO}) {
    $log->error("missing TCSO - not a sol file") if $log->is_error();
    return; # failure
  }

  # Skip next 7 bytes
  getString(7);

  # Read next byte (length of name) + the name
  my $name = getString(getint(1));

  $log->debug("name:$name") if $log->is_debug();

  # Skip next 4 bytes
  getString(4);

  return $name; # ok
}

# read an element, return "name;datatype;value"
sub readElement {
  my $retval = undef;

  # Read element length and name
  my $name = getString(getint(2));

  # Read data type and value
  my ($type, $value) = getTypeAndValue();
  {
    no warnings q{uninitialized}; # allow undef values
    $log->info(qq{$name;$type;$value}) if $log->is_info();
    $retval = qq{$name;$type;$value};
  }

  # Read trailer (single byte)
  my $trailer = getBytes(1);
  if ($trailer != 0) {
    $log->warn(qq{Expected 00 trailer, got '$trailer'}) if $log->is_wa
+rn();
  }

  return $retval;
}

# ------ parse file ------
sub parse {
  my $file = shift;

  $log->logdie( q{Missing argument file.}) if (!$file);
  $log->logdie(qq{No such file '$file'})  if (! -f $file);

  $log->debug("start") if $log->is_debug();

  open($FH,"< $file") || $log->logdie("Error opening file $file");
  $log->debug(qq{file:$file}) if $log->is_debug();
  binmode($FH);

  my @retvals = ();

  # Read header
  my $name = readHeader() or $log->logdie("Invalid sol header");
  push @retvals, $name;

  # Read data elements
  while (eof($FH) != 1) {
    push @retvals, readElement();
  }

  close($FH) or $log->logdie(q{failed to close filehandle!});

  return @retvals;
}

1;

__END__

=pod

=head1 NAME

  Sol::Parser - a .sol file reader

=head1 SYNOPSIS

  use Sol::Parser;
  my @content = Sol::Parser::parse("settings.sol");
  print join("\n", @content);

=head1 DESCRIPTION

Local Shared Object (LSO), sometimes known as flash cookies, is a
cookie-like data entity used by Adobe Flash Player.  LSOs are stored
as files on the local file system with the I<.sol> extension.  This
module reads a Local Shared Object file and return content as a list.

=head1 SOL DATA FORMAT

The SOL files use a binary encoding.  It consists of a header and any
number of elements.  Both header and the elements have variable length
+s.

=head2 Header

The header has the following structure:

=over

=item * 6 bytes (discarded)

=item * 4 bytes that should contain the string 'TSCO'

=item * 7 bytes (discarded)

=item * 1 byte that signifies the length of name (X bytes)

=item * X bytes name

=item * 4 bytes (discarded)

=back

=head2 Element

Each element has the following structure:

=over

=item * 2 bytes length of element name (Y bytes)

=item * Y bytes element name

=item * 1 byte data type

=item * Z bytes data (depending on the data type)

=item * 1 byte trailer

=back


=head1 TODO

=head2 Support I<XML> output

=head2 Add support for datatypes I<date> and I<doublestring>.

=head1 SEE ALSO

=head2 Local Shared Object

http://en.wikipedia.org/wiki/Local_Shared_Object

=head2 Flash coders Wiki doc on .Sol File Format

http://sourceforge.net/docman/?group_id=131628

=head2 A Python sol file converter

http://osflash.org/s2x

=head1 AUTHOR

  andreas1234567 on perlmonks.org

=cut
Update Tue Dec 18 14:18:29 CET 2007: I acknowledge Anonymous Monk's comment on Readonly. It will be replaced by use constant once released on CPAN.

Update Wed Dec 19 16:50:20 CET 2007 Released on CPAN as Sol::Parser.

Update Thu Dec 20 09:57:22 CET 2007 Released on CPAN as Parse::Flash::Cookie.

Replies are listed 'Best First'.
Re: Sol::Parser - a sol file reader
by Anonymous Monk on Dec 18, 2007 at 09:57 UTC