sub guess_perl_type {
my $self = shift;
my $node = shift;
my $obj = B::svref_2object(ref($node) ? $node : \$node);
my $class = B::class($obj);
return $class unless $class eq "PVMG";
my $flags = $obj->flagspv;
return "PV" if $flags =~ "POK";
die "what type is $flags?";
}
#!/usr/bin/perl
package XML::SAX::ExpatNB;
use base qw/XML::SAX::Expat/;
use strict;
use warnings;
use XML::Parser ();
use Carp qw/croak/;
sub _really_create_parser {
my $p = shift;
my $opt = shift;
$p->{_xml_parser_obj} ||= $p->SUPER::_create_parser($opt);
}
sub _create_parser {
my $p = shift;
$p->_expat_obj;
}
sub _expat_obj {
my $p = shift;
$p->{_expat_nb_obj} = shift if @_;
$p->{_expat_nb_obj};
}
sub _parser_obj {
my $p = shift;
$p->{_xml_parser_obj} = shift if @_;
$p->{_xml_parser_obj};
}
sub parse {
my $p = shift;
my $opts = $p->get_options(@_);
if ($p->{Parent}){
return $p->{Parent}->parse($opts);
} else {
if (defined $opts->{Source}{String}){
return $p->_parse_string($opts->{Source}{String});
} else {
croak "The only thing I know how to parse is a string. You
+ have to fetch me the data yourself.";
}
}
}
sub parse_more {
my $p = shift;
$p->parse_string(@_);
}
sub _parse_string {
my $p = shift;
my $xml = shift;
$p->parse_start unless $p->{_parsing};
$p->_expat_obj->parse_more($xml);
}
sub parse_start {
my $p = shift;
my $opt = shift;
croak "Can't parse_start - Already started"
if $p->{_parsing};
$p->{_parsing} = 1;
$p->_really_create_parser($opt);
$p->_expat_obj($p->_parser_obj->parse_start);
}
sub parse_done {
my $p = shift;
croak "Can't parse_done - Havn't started parsing"
unless $p->{_parsing};
undef $p->{_parsing};
$p->_expat_obj->parse_done;
}
__PACKAGE__
__END__
=pod
=head1 NAME
XML::SAX::ExpatNB - XML::SAX::Expat subclass for nonblocking parsing.
=head1 SYNOPSIS
use XML::SAX::ExpatNB;
# don't do this, use XML::SAX::ParserFactory
my $p = XML::SAX::ExpatNB->new( Handler => MyHandler->new );
$p->parse_start;
while (<DATA>){
$p->parse_more($_); # or $p->parse_string($_);
}
$p->parse_done;
=head1 DESCRIPTION
Most XML parsers give a callback interface within an encapsulated loop
+. That
is, you call
$p->parse_whatever($whatever);
And eventually, when C<$whatever> is depleted by the parser, C<< $p->p
+arse >>
will return.
Sometimes you don't want the parser to control the loop for you. For e
+xample,
if you need to retrieve your XML in chunks in a funny way, you might n
+eed to do
something like
my $doc = '';
while (defined(my $buffer = get_more_xml())) {
$doc .= $buffer;
}
$p->parse_string($doc);
which is not very convenient, or efficient. You could use L<perltie> t
+o tie a
filehandle which does this for you, but that only works some of the ti
+me (for
example, say you have two inputs coming in simultaneously).
L<XML::Parser::ExpatNB|XML::Parser::Expat/"XML::Parser::ExpatNB Method
+s">
solves this by providing three methods:
=over 4
=item parse_start
=item parse_more
=item parse_done
=back
This interface lets you move the loop to outside the parser, retaining
+ control.
The callbacks are executed in the same manner, just that now, when the
+re is no
left to parse, instead of taking more data from a source on it's own,
+the
parser returns control to you.
$p->parse_start; # you can omit this - parse_start will
# be called automatically as needed
while(defined(my $buffer = get_more_xml())) {
$p->parse_more($buffer);
}
$p->parse_done;
This module is a subclass of L<XML::SAX::Expat> which is to
L<XML::Parser::ExpatXS> as L<XML::SAX::Expat> is to L<XML::Parser> its
+elf.
=cut
Vim logarithmic line movement
- Log2Backtrack will, given no state, jump to the center of the current line, or backtrack up the search.
- Log2Left will eliminate the right half of the search space, moving the cursor between what the cursor is now and the left marker
- Log2Right will do the same, just to the other direction
Example
Initially, Log2Backtrack() will put the cursor here:
This is a sample line of text which we will move around
^
Log2Right() will then put the cursor here
This is a sample line of text which we will move around
^
Log2Left() will afterwords put the cursor here
This is a sample line of text which we will move around
^
Log2Backtrack() at this point will return the cursor to here
This is a sample line of text which we will move around
^
And now, Log2Right() will move the cursor to
This is a sample line of text which we will move around
^
The "state" is invalidated if the cursor/linenumber do not correspond with what they were in the last call. Supposedly this is a bug - if you move with the log2 commands, then do something else, end up with the cursor in the same place, and do the log2 commands again, the state will not be reset as it sort of should. The only work around I see is to implement a reasonable timeout so that confusion is minimized.
I use these mappings:
map <F5> :call Log2Left()<CR>
map <F6> :call Log2Backtrack()<CR>
map <F7> :call Log2Right()<CR>
Here is the code:
function Log2InitState()
if !exists("b:log_2_cursor") || b:log_2_cursor != col(".") || b:lo
+g_2_line != line(".")
let b:log_2_cursor = col(".")
let b:log_2_line = line(".")
let b:log_2_left = 0
let b:log_2_right = strlen(getline(b:log_2_line))
let b:log_2_call_stack = ""
endif
endfunction
function Log2MoveIt(i)
let b:log_2_cursor = a:i + b:log_2_left + ( (b:log_2_right - b:log
+_2_left) / 2 )
call cursor(b:log_2_line, b:log_2_cursor)
endfunction
function Log2StackPush (val)
let b:log_2_call_stack = b:log_2_call_stack . a:val
endfunction
function Log2StackPop ()
let ret = strpart(b:log_2_call_stack, strlen(b:log_2_call_stack)-1
+, 1)
let b:log_2_call_stack = substitute(b:log_2_call_stack, ".$", "",
+"")
return ret
endfunction
function Log2Left()
call Log2InitState()
if b:log_2_right - b:log_2_left > 1
let b:log_2_right = b:log_2_cursor
call Log2StackPush("l")
call Log2MoveIt(0)
endif
endfunction
function Log2Right()
call Log2InitState()
if b:log_2_right - b:log_2_left > 1
let b:log_2_left = b:log_2_cursor
call Log2StackPush("r")
call Log2MoveIt(1)
endif
endfunction
function Log2Backtrack()
call Log2InitState()
let popped = Log2StackPop()
if popped == "r"
let b:log_2_left = b:log_2_left - (b:log_2_right - b:log_2_lef
+t)
if b:log_2_left < 0
let b:log_2_left = 0
endif
elseif popped == "l"
let b:log_2_right = b:log_2_right + ( b:log_2_right - b:log_2_
+left)
if b:log_2_left > strlen(getline(b:log_2_line))
let b:log_2_line = strlen(getline(b:log_2_line))
endif
else
let b:log_2_left = 0
let b:log_2_right = strlen(getline(b:log_2_line))
endif
call Log2MoveIt(0)
endfunction
Hyper return via return handles, from here on 'bookmarks'.
bookmarks are runtime equivelents of labels which exist in the dynamic scope. They are an object that knows of a certain point in the optree, and a certain level in the stack.
the method ->jump, or the function return_to($obj) take you to that position, returning through the call stack upwards. The assignment of the bookmark is redone, and the LIST passed to the method/function are appended to the return from the bookmark constructor.
And now, for something completely different...
#!/usr/bin/perl
{
package MySchema;
use base qw/Class::DBI::Schema::SQL::Translator/;
sub dbh { DBI->connect_cached( ... ); # mine comes from a config m
+odule
}
{
package MyTable;
use base qw/Class::DBI::Schema::SQL::Translator::Table/;
sub schema_class { "MySchema" }
}
{ # this should really be a file
package Foo::Schema;
use base qw/MyTable/;
use Class::DBI::Schema::SQL::Translator; # for CAPS_SUBS
sub init {
my $self = shift->SUPER::init(@_);
$self->add_fields(
# these guys create SQL::Translator::Schema::Field
# objects, or transform them. Prototypes make them
# sort of like SQL
ID,
STRING name => 30,
UNIQUE INTEGER "blah",
);
return $self; # SQL::Translator objects are Class::Base based
}
}
# elsewhere
use Foo::Schema; # import creates an instance
# ... or
# my $table = Foo::Schema->instance; # because ->isa("Class::Singleton
+")
# creating the schema singleton will cause it to register with the glo
+bal schema
# and stub the CDBI class with an AUTOLOAD.. more on this several line
+s down
# the fun part:
# just use CDBI
Foo->create({ name => "bob" });
# some explanation:
# AUTOLOAD causes the schema object to create the CDBI class,
# by calling $schema->make_cdbi_class.
# make_cdbi_class will do some standard CDBI stuff.
# it will also make a db_Main if Foo::Schema->schema_class (the global
+ schema)
# ->can("dbh")
# Foo::Schema->cdbi_class == Foo, the ::Schema suffix is removed.
# if Foo::Schema->cdbi_class->db_Main->tables doesn't contain
# Foo::Schema->cdbi_class->moniker, it uses SQL::Translator to create
+the table
# in Foo->db_Main
# the Class::DBI::Schema::SQL::Translator::Table init checks for
# ->can("SUB"), so you can say
package Foo::Schema;
sub FIELDS {
ID,
STRING blah => $length,
};
# or even
use constant FIELDS => [ ID, STRING blah => $length ];
# but i have to do this for relationships too.
FIXME reminder system
I write code with many # FIXME statments in it. Recently I wrote a reminder system to help me manage them. Example follows.
In my source dir I have a file, lib/MyClass/Exception.pm, which currently uses a class, MyClass::Message as it's base class, although it should use it as an interface (however, interface is somewhat broken for the meanwhile).
It looks like this:
#!/usr/bin/perl
package MyClass::Exception;
use base qw/Error/;
use base qw/MyClass::Message/; # FIXME use interface;
...
When I wrote it, I didn't care much, but knew I would like it changed in the future for perfectionism's sake. I moved on, though, because real work is more important than fixing interface.
As I work on something else, I rerun the test suite (output trimmed). In it, the exception class is tested too, and as it is compiled I see:
• [syeeda:~/src] nothingmuch % prove t
t/exception-api....# FIXME: use interface; at /Users/nothingmuch/src/l
+ib/MyClass/Exception.pm line 4
t/exception-api....ok
All tests successful.
Files=1, Tests=10, 5 wallclock secs ( 1.71 cusr + 0.24 csys = 1.95
+CPU)
Which is rather useful.
To further demonstrate usefulness, consider:
• [syeeda:~/src] nothingmuch % touch /Users/nothingmuch/src/lib/MyClas
+s/Exception.pm
• [syeeda:~/src] nothingmuch % prove t
t/exception-api....ok
+
All tests successful.
Files=1, Tests=10, 5 wallclock secs ( 1.71 cusr + 0.24 csys = 1.95
+CPU)
Basically, what I have is something that tells me when I need to fix a file, because it looks like i've forgotten it. Why does it do that?
because I have a rules file that looks like this:
#!/usr/bin/perl
use Date::Manip;
use Time::Piece;
use File::Find::Rule;
sub parse_date { Time::Piece->strptime(ParseDate(shift), "%Y%m%d%T") }
(
sub { # don't remind to fix code which I'm not responsible for
my $self = shift;
DROP if $self->{file} =~ m{
/usr/.*lib # not mine, 5.9.1 tree
| /.*Library/Perl # not mine, OSX stock
| support # [patched] support code, test
+ for manually
}x;
},
sub { # don't remind to fix files which i've been working on recen
+tly
my $self = shift;
DROP unless File::Find::Rule->
file->
mtime( "<=" . parse_date("yesterday")->epoch )->
in( $self->{file} );
},
);
And to get the effect of implicit reminding, my environment contains:
PERL5OPT=-MDevel::FIXME::Rules::PerlFile
This is a FIXME subclass which evaluates ~/.fixme/rules.pl, which is expected to return an array reference of code references to the require.
Here is the implementation... Future versions might use Filter::Simple's regexes, as well as some simple-minded matching to look only in comments. Currently it will match strings and docs too, but I think it's pretty much a non-issue.
Enjoy!
#!/usr/bin/perl
package Devel::FIXME;
use 5.008_000; # needs FH interface to var
use strict;
use warnings;
use Exporter;
use Scalar::Util qw/reftype/;
use List::Util qw/first/;
use Carp qw/carp croak/;
our @EXPORT = qw/FIXME/;
our @EXPORT_OK = qw/SHOUT DROP CONT/;
our %EXPORT_TAGS = ( "constants" => \@EXPORT_OK );
our $VERSION = 0.01;
# some constants for rules
sub CONT () { 0 };
sub SHOUT () { 1 };
sub DROP () { 2 };
my %lock; # to prevent recursion
our %rets; # return value cache
our $cur; # the current file, used in an eval
our $err; # the current error, for rethrowal
our $inited; # whether the code ref was installed in @INC, and all
{ my $anon = ''; open my $fh, "<", \$anon or die $!; close $fh; } # ot
+herwise perlio require stuff breaks
sub init {
my $pkg = shift;
unless($inited){
$pkg->readfile($_) for ($0, sort grep { $_ ne __FILE__ } (valu
+es %INC)); # readfile on everything loaded, but not us (we don't want
+ to match our own docs)
$pkg->install_inc;
}
$inited = 1;
}
our $carprec = 0;
sub install_inc {
my $pkg = shift;
unshift @INC, sub { # YUCK! but tying %INC didn't work, and source
+ filters are applied per caller. XS for source filter purposes is yuc
+ki/er/
my $self = shift;
my $file = shift;
return undef if $lock{$file}; # if we're already processing th
+e file, then we're in the eval several lines down. return.
local $lock{$file} = 1; # set the lock
unless (ref $INC[0] and $INC[0] == $self){ # if this happens,
+some stuff won't be filtered. It shouldn't happen often though.
local @INC = grep { !ref or $_ != $self } @INC; # make sur
+e we don't recurse when carp loads it's various innards, it causes a
+mess
carp "FIXME's magic sub is no longer first in \@INC";
}
# create some perl code that gives back the return value of th
+e original package, and thus looks like you're really requiring the s
+ame thing
my $buffer = "\${ delete \$Devel::FIXME::rets{q{$file}} };"; #
+ return what the last module returned. I don't know why it doesn't wo
+rk without deref
# really load the file
local $cur = $file;
my $ret = eval 'require $Devel::FIXME::cur'; # require always
+evaluates the return from an evalfile in scalar context
($err = "$@\n") =~ s/\nCompilation failed in require at \(eval
+ \d+\)(?:\[.*?\])? line 1\.\n//s;
$buffer = 'die $Devel::FIXME::err' if $@; # rethrow this way f
+or the sake of shutting up base
# save the return value so that the original require can have
+it
$rets{$file} = \$ret;
# look for FIXME comments
$pkg->readfile($INC{$file}) if ($INC{$file});
open my $fh, "<", \$buffer;
$fh; # empty stub like thing. Simply returns the last thing th
+e real file did.
};
}
sub readfile {
my $pkg = shift;
my $file = shift;
return unless -f $file;
open my $src, "<", $file or die "couldn't open $file: $!";
local $_;
while(<$src>){
$pkg->FIXME(
text => "$1",
line => $.,
file => $file,
) if /#\s*FIXME\s+(.*)$/; # if a file matches the comment, emi
+t a warning.
} continue { last if eof $src }; # is this a platform bug on OSX?
close $src;
}
sub eval { # evaluate the rules, one by one
my $self = shift;
foreach my $rule ($self->rules){
my $action = &$rule($self);
if ($action == SHOUT){ # if the rule said to shout, we shout a
+nd stop
return $self->shout;
} elsif ($action == DROP){ # if the rule says to drop, we stop
return undef;
} # otherwise we keep looping through the rules
}
$self->shout; # and shout if there are no more rules left.
}
sub rules { }; # shout by default
sub shout { # generate a pretty string and send it to STDERR
my $self = shift;
warn("# FIXME: $self->{text} at $self->{file} line $self->{line}.\
+n");
}
sub new { # an object per FIXME statement
my $pkg = shift;
my %args;
if (@_ == 1){ # if we only have one arg
if (ref $_[0] and reftype($_[0]) eq 'HASH'){ # and it's a hash
+ ref, then we take the hashref to be our args
%args = %{ $_[0] };
} else { # if it's one arg and not a hashref, then it's our te
+xt
%args = ( text => $_[0] );
}
} elsif (@_ % 2 == 0){ # if there's an even number of arguments, t
+hey are key value pairs
%args = @_;
} else { # if the argument list is anything else we complain
croak "Invalid arguments";
}
my $self = bless \%args, $pkg;
# fill in some defaults
$self->{package} ||= (caller(1))[0];
$self->{file} ||= (caller(1))[1];
$self->{line} ||= (caller(1))[2];
$self->{script} ||= $0;
$self->{time} ||= localtime;
$self;
}
sub import { # export \&FIXME to our caller, /and/ generate a message
+if there is one to generate
my $pkg = $_[0];
$pkg->init;
if (@_ == 1 or @_ > 2 or (@_ == 2 and first { $_[1] eq $_ or $_[1]
+ eq "&$_" } @EXPORT_OK, map { ":$_" } keys %EXPORT_TAGS)){
shift;
local $Exporter::ExportLevel = $Exporter::ExportLevel + 1;
$pkg->Exporter::import(@_);
} else {
goto \&FIXME;
}
}
sub FIXME { # generate a method
my $pkg = __PACKAGE__;
$pkg = shift if UNIVERSAL::can($_[0],"isa") and $_[0]->isa(__PACKA
+GE__); # it's a method or function, we don't care
$pkg->new(@_)->eval;
}
*msg = \&FIXME; # booya.
__PACKAGE__
__END__
=pod
=head1 NAME
FIXME - Semi intelligent, pending issue reminder system.
=head1 SYNOPSIS
this($code)->isa("broken"); # FIXME what shall we fix?
=head1 DESCRIPTION
Usually we're too busy to fix things like circular refs, edge cases an
+d things
when we're spewing code into the editor. A common thing to do is to ma
+ke a
comment saying
... # FIXME I hope someone finds this comment
somewhere in your code, and then search through your sources for occur
+rances of
I<FIXME> every now and then.
This works, pretty much, until your code base grows considerably, and
+you have
too many FIXMEs to prioritise properly.
The solution for me was to create this package, which gives the author
+ an
"intelligent" way of being reminded.
=head1 DECLARATION INTERFACE
There are several ways to get your code fixed by you in some indetermi
+nate time
in the future.
The first is a sort-of source filter based compile time fix, which doe
+s not
affect shipped code.
$code; # FIXME broken
That's it. The most reasonable way to get it to work is to set the env
+ironment
variable I<PERL5OPT>, so that it contains C<-MFIXME>.
The second interface is a compile time, somewhat more explicit way of
+emmitting
messages.
use Devel::FIXME "broken";
This can be repeated for additional messages as needed. This is useful
+ if you
want your FIXMEs to break as you test it with a perl tree that doesn't
+ have
FIXME.pm in it.
The third, and probably most problematic is a runtime, explicit way of
+ emmitting
messages:
$code; FIXME("broken");
This relies on FIXME to have been imported into the current namespace,
+ which is
probably not always the case. Provided you know FIXME is loaded I<some
+where> in
the running perl interpreter, you can use a fully qualified version:
$code; Devel::FIXME::FIXME("broken");
or if you feel that's not pretty enough:
$code; Devel::FIXME->msg("broken");
# or
$code; Devel::FIXME::msg("broken");
But do use the former two methods instead.
=head1 OUTPUT FILTERING
=head2 Rationale
There are some problems with simply grepping for occurances of I<FIXME
+>:
=over 4
=item *
It's messy - you get a bajillion lines, if your source tree is big eno
+ugh.
=item *
You need context, which can be provided for, but is a bit of work (and
+ adds to
the clutter).
=item *
You (well I<me> anyway) forget to do it. And no, cron is not perfect f
+or this.
=back
The solution to the first two problems is to make the reporting smart,
+ so that
it decides which FIXMEs are printed and which arent.
The solution to the last problem is to have it happen automatically wh
+enever
the source code in question is used.
=head2 Principal
The way FIXMEs are filtered is similar to how a firewall filters packe
+ts.
Each FIXME statement is considered as it is found, by iterating throug
+h some
rules, which ultimately decide whether to print the statement or not.
This may sound a bit overkill, but I think it's useful.
What it means is that you can get reminded of FIXMEs in source files t
+hat are
more than a week old, or when your release schedule reaches feature fr
+eeze, or
if your program is in the stable tree if your source management reposi
+tory, or
whatever.
=head2 Practice
Currently the FIXMEs are filtered by calling the class method C<rules>
+, and
evaluating the subroutine references that are returned, as methods on
+the fixme
object.
In the future, a witty little grammer will be written, to allow you to
+ do the
trivial things concisely, and use perl code for more complicated rules
+, in a
config file that is global to your settings.
=head1 BUGS
If I had a nickle for every bug you could find in this module, I would
+ have
zero or more nickles.
=head1 COPYRIGHT & LICNESE
Same as perl.
=head1 AUTHOR
Yuval Kogman <nothingmuch@woobling.org>
=head1 GETTING A HANDLE ON IO
=head2 Introduction
This tutorial will delve into the naughtier, uglier parts of POSIX cen
+tric I/O.
Herein are covered the nasty details of the various calls, and the lay
+ers they
belong to, the different modes of input and output, and the combinatio
+n of
these aspects into practical examples.
The tutorial will not attempt to explain the various ways to obtain th
+e actual
handles, except as necessary for specific examples. It's aim is to tut
+or on the
various styles of I/O, that is, to manipulate given handles, in more
specialized ways, in the hope that when the kind of interaction with a
+ handle a
user wants to attain is known, finding out how to get such a handle wi
+ll be
easy by using the reference (L<perlipc>, L<perlopentut>, L<perlfunc/so
+cket>,
L<perlfunc/pipe>, L<perlfunc/fcntl>, L<IPC::Open2>).
The main focus of the tutorial will be simplicity. Thereafter will com
+e
robustness, and then performance. What this means is that I will not
systematically append C<or die "$!"> to every line of example, because
+ I find
that distracting. I will also not resort to ugly constructs to gain a
+little
throughput. I think impure examples hinder my ability to convey my ide
+as
clearly.
Nuff said, on to the intro. We start with tiny baby steps, and then st
+art
striding forward.
=head2 What is a filehandle?
We'll start by covering the perl specific data type, which abstracts a
+ stream
of data. The filehandle.
If you already think you know what you're doing, skip onwards a bit. T
+his is
really basic stuff.
Perl's filehandles are points through which data is moved. You can ref
+er to by
name, or by storing them in a variable.
The abstraction focuses round a metaphor of a sort of port hole, or pi
+pe end,
which your software can ask the OS to take data from and move it elsew
+here, or
put data on for your software to read.
Data is moved via these orifices in chunks, coming out of or going int
+o a
normal variable, as a string.
For example, lets say we've opened a file:
open my $fh, "/some/file";
This stores a reference to a filehandle in the variable C<$fh>, which
+will
grant you access to the data inside C</some/file>.
Perl allows us to ask for data to come out of filehandles in useful wa
+ys. Lets
say we wanted a single line from C</some/file> to be stored in the var
+iable.
my $var = <$fh>;
But wait, how do we know which line will come out of C<$fh>? Well, the
+ answer
is "the next one". Filehandles are stream oriented. Data will arrive s
+erially,
and you can nibble at it, slowly progressing through the stream of dat
+a, till
it ends.
Specifically, handles having to do with files will have an implicit cu
+rsor,
working behind the scenes, marking the point in the file which the han
+dle is
currently at.
=head2 Plumbing your handles
To move data in and out of file handles you use system calls. We'll st
+art with
the two most basic calls there are, L<read(2)> and L<write(2)>, which
+are
available in perl as the builtin functions C<sysread> and C<syswrite>.
Their interfaces are pretty streight forward. Here is a subset of thei
+r
functionality:
sysread $fh, $variable_data_will_be_read_to, $how_much_data_to_rea
+d;
C<sysread> takes a filehandle as it's first argument, a variable as it
+'s
second, and a number as it's third, and read as many bytes as are spec
+ified in
the number, from the handle, into the variable.
syswrite $fh, $data_to_write;
C<syswrite> will take a filehandle as it's first argument, and a strin
+g as it's
second argument, and write the data from the string, to the filehandle
+.
We already know of a way data can be put on a filehandle for us, which
+ was
telling the OS what file we'd like it to come from. Writing is just as
flexible. The next section discusses ways of telling the OS not only w
+hat data
is moved around, but where it will go.
=head2 Directing data, a conceptual introduction
Now that we've a hopefully firm grasp on how data enters and exits you
+r
software through handles, lets discuss it's movement, specifically, wh
+ere it
goes.
The most common use for filehandles is for storing and retrieving data
+ in
files.
We've already seen opening for reading. We can also write to a file:
open my $fh, ">", "/some/file";
The C<< > >> argument will tell C<open> that we want to write to the f
+ile (and
also to erase it's contents first). When C<$fh> is opened for writing,
+ we
simply use C<syswrite> or one of it's deriviatives on it.
But handles are not limited to just files. They can also be sockets, a
+llowing
the transfer of data between two unrelated processes, on possibly two
+different
machines. A web server, for example, reads and writes on handles, rece
+iving and
sending data to browsers.
Handles can be used as pipes to other processes, like to child process
+es using
L<pipe> or processes in a shell pipeline. The latter case is interesti
+ng,
because it is done implicitly:
cat file | tr a-z A-Z > file.uppercase
That command will ask L<cat(1)> to read the file C<file>, and then pri
+nt it to
it's I<standard output>. The standard output is a handle that you woul
+d
normally output data to. What "normally" means in this context will be
explained soon. Then L<tr(1)> reads data from it's I<standard input>,
+converts
the data, and writes it to I<it's> standard output. It does this a chu
+nk at a
time. The shell redirect is perhaps the most interesting part: instead
+ of
L<tr(1)>'s STDOUT being connected to the terminal, where the user can
+read the
data, the shell connected L<tr(1)>'s STDOUT to a handle of it's own, w
+hich is
opened to C<file.uppercase>.
I hope that the example fullfilled it's purpose in demonstrating the
flexibility of the concept of piping data around through file handles.
=head2 The going gets tough
Now that we've covered the conceptual basics, lets look in greater det
+ail at
the most simple type of handle there is - a single purposed, non seeka
+ble,
blocking handle.
What single purposed means is that it can either read, or write. Not b
+oth.
What seekable means, is that you can use C<seek> to change the cursor
+position
for the file the handle abstracts. Not all handles abstract files, and
+ thus not
all handles have cursors. The ones that don't work more simply. A bloc
+king
handle refers to the type of semantics the system calls on the handle
+will work
in.
Non-seekable handles are implemented in terms of a buffer. The operati
+ng system
associates some scratch space for it. As data comes into the buffer fr
+om
somewhere (it could be your software writing to it, or somebody else i
+f you're
on the reading side), it is accumilated in that buffer. When data is r
+ead from
the handle, it is taken from the buffer.
What happens when there is not enough space in the buffer to write any
+more? Or
not enough data in the buffer to be read? This is where the blocking s
+emantics
of this kind of handle comes in. I'm oversimplifying, but basically, i
+f the
writing side wants to write a chunk of data that is too big for the sp
+ace in
the buffer, the operating system simply makes it wait with the write t
+ill the
reading side asks for some data to come out. As data exits the buffer,
+ more
space is cleared out, and the writing can continue. Eventually all the
+ data
will be written to the buffer, and the write system call that the writ
+ing side
executed will return. The same goes for reading: the read system call
+will
simply wait until the data that was asked for has been made available.
The state in which an operating system puts a process that is waiting
+for an IO
call to complete is referred to as 'blocked'. When a process is blocke
+d it
leaves the hardware resources free for other processes to use.
Blocking IO has an interesting property, in that it balances resource
allocation in a pipeline.
Lets say for example, that you ran this line of shell:
cat file.gz | gzip -d | tr a-z A-Z
L<cat(1)> is doing very little work. It's a simple loop. It reads from
+ the
file, and writes to STDOUT. The data that L<gzip(1)> is getting, on th
+e other
hand, is processed more extensively. L<gzip(1)> performs some complex
calculation on the data that enters it, and outputs derived data after
+ this
calculation. Then, finally, L<tr(1)> performs simple actions, that whi
+le more
complex than L<cat(1)>, they are dwarfed by L<gzip(1)>.
So what happens is that L<cat(1)> will read some data, and then write
+some
data, and then read some more data, and write some more data, until th
+e buffer
is full, and it's write will block. All this time L<gzip(1)>'s and L<t
+r(1)>'s
read calls were blocking. Eventually L<gzip(1)>'s read will return, a
+llowing
it to do it's job, and finally emit data to L<tr(1)>. It will turn out
+ that
most of the time L<gzip(1)> will use up CPU time, while L<cat(1)> will
+ spend
most of it's time blocking in write calls, and L<tr(1)> will spend mos
+t of it's
time blocking in read calls, but will need some time for calculation t
+oo,
otherwise L<gzip(1)>'s writes will block.
Plan (not really in order):
blocking, nonseekable handles, and their conventions:
fatal errors, SIGPIPE, etc. promote fault tolerant behavior by default
UNIX pipelining mantra
explain when blocking is not good, and continue with a single purpose,
+ non
seekable handle as used in a select loop to avoid it. mention epoll/kq
+ueue, and
perl interfaces for thereof. Mention Event/POE as more powerful multip
+lexing solutions.
multiplexing, with a threading approach, as an alternative to select.
and a non blocking approach, including SIGIO, nb vs. select, reliabili
+ty, and
latency versus blocking & selected IO.
When not to use nonblocking
bufferring, stdio vs sycalls, different functions, perlio
Touch seekable handles briefly, and explain the semantics of blocking
+and so on
as far as file io is concerned, mention files, and discuss that not al
+l things
in the filesystem are files: devices (char and block), named pipes, un
+ix domain
sockets...
Sockets
Introduce non stream handles, and discuss the implementations of socke
+t IO, and
it's multilayered nature, the relationship between streams and datagra
+ms.
Implications of networking envs.
discuss IO on shared handles
discuss accept() on shared sockets in a preforked env
appendix: faux IO: open ref, perlio layerrs and ties
|