[Event "Botvinnik Memorial"]
[Site "Moscow"]
[Date "2001.12.05"]
[Round "4"]
[White "Kasparov, Garry"]
[Black "Kramnik, Vladimir"]
[Result "1/2-1/2"]
[ECO "C80"]
[WhiteElo "2839"]
[BlackElo "2808"]
[PlyCount "37"]
[EventDate "2001.12.01"]
1. e4 e5 2. Nf3 Nc6 3. Bb5 a6 $1 {first comment} 4. Ba4 Nf6 5. O-O
Nxe4 {second comment} 6. d4 ; comment starting with ";" up to EOL
b5 7. Bb3 d5 8. dxe5 Be6 9. Be3 {third comment} 9... Bc5 10. Qd3 O-O
11. Nc3 Nb4 (11... Bxe3 12. Qxe3 Nxc3 13. Qxc3 Qd7 14. Rad1 Nd8 $1
15. Nd4 c6 $14 (15... Nb7 16. Qc6 $1 $16)) 12. Qe2 Nxc3 13. bxc3 Bxe3
% escaped line - it will be discarded up to the EOL
14. Qxe3 Nc6 {wrong } comment} 15. a4 Na5 oh? 16. axb5 {yet another
comment} (16. Nd4 {nested comment}) 16... axb5 17. Nd4 (17. Qc5 c6 18.
Nd4 Ra6 19. f4 g6 20. Ra3 Qd7 21. Rfa1 Rfa8) 17... Qe8 18. f4 c5 19.
Nxe6 the end 1/2-1/2
####
$game =
{ 'Event' => 'Botvinnik Memorial',
'Site' => 'Moscow',
'Round' => '4',
'White' => 'Kasparov, Garry',
'Black' => 'Kramnik, Vladimir',
'Date' => '2001.12.05',
'Result' => '1/2-1/2',
'WhiteElo' => '2839',
'BlackElo' => '2808',
'ECO' => 'C80',
'PlyCount' => '37',
'EventDate' => '2001.12.01',
GameMoves => [ qw( e4 e5 Nf3 Nc6 Bb5 a6 Ba4 Nf6 O-O Nxe4 d4 b5 Bb3 d5
dxe5 Be6 Be3 Bc5 Qd3 O-O Nc3 Nb4 Qe2 Nxc3 bxc3 Bxe3 Qxe3 Nc6 a4
Na5 axb5 axb5 Nd4 Qe8 f4 c5 Nxe6) ],
'GameComments' => {
'3b' => ' $1 {first comment}',
'5b' => ' {second comment}',
'6w' => ' ; comment starting with ";" up to EOL ',
'9w' => ' {third comment}',
'11b' => ' (11... Bxe3 12. Qxe3 Nxc3 13. Qxc3 Qd7 14. Rad1 Nd8 $1
15. Nd4 c6 $14 (15... Nb7 16. Qc6 $1 $16))',
'14b' => ' {wrong }',
'16w' => ' {yet another comment} (16. Nd4 {nested comment})',
'17w' => ' (17. Qc5 c6 18. Nd4 Ra6 19. f4 g6 20. Ra3 Qd7 21.
Rfa1 Rfa8)'
},
'GameErrors' => {
'14b' => 'comment}',
'15b' => 'oh?',
'19w' => 'the end'
}
}
##
##
my %switchcolor = ( 'w' => 'b', 'b' => 'w');
my @gamechars = split //, $gametext;
for ($position =0; $position < scalar @gamechars; ) {
if ($gamechars[$position] =~ /([\(\[\{])/) {
$end_comment = _find_end_symbol(\@gamechars, $position, $1);
$comment = substr($gametext, $position,
$end_comment - $position +1);
$game{comments}->{$movecount.$color} .= " " . $comment;
$position = $end_comment + 1;
}
elsif ($gamechars[$position] =~ /([;%])/) {
$end_comment = _find_EOL(\@gamechars, $position);
# store comment as above
}
elsif (($gamechars[$position] =~ /[1-9]/) {
$num = _find_chunk (\@gamechars, $position, $REnumber);
$movecount++;
$position += length($num);
}
elsif ($gamechars[$position] =~ /([OKQRBNa-h])/) {
$move = _find_chunk(\@gamechars, $position, $REmove);
$color = $switchcolor{$color};
$position += length($move);
}
else {
$position++;
# store into errors unless /\n|\s/;
}
}
sub _find_chunck (\@array $pos $regexp ){
# returns a portion of @array starting from $pos
# matching $regexp
}
sub _find_EOL (\@array $pos){
# returns the nearest EOL from @array starting from $pos
#
}
sub _find_end_symbol (\@array $pos $symbol ){
# returns the position of the closing symbol
# "{" => "}", "(" =>")", "[" => "]"
# in \@array, starting at $pos
}
##
##
while (<>) {
chomp;
PARSER: {
m/ \G( \d+\b )/gcx && do { print "number: $1\n"; redo; };
m/ \G( \w+ )/gcx && do { print "word: $1\n"; redo; };
m/ \G( \s+ )/gcx && do { print "space: $1\n"; redo; };
m/ \G( [^\w\d]+ )/gcx && do { print "other: $1\n"; redo; };
}
}
##
##
while (m/$REmove|$REnumber|$REcomment|$REeolcomment/g) {
print $1;
}
##
##
package PGNParser; # temporary name
use FileHandle;
use Regexp::Common;
sub new {
my $class = shift;
my $filename = shift || return undef;
my $fh = new FileHandle "< $filename";
unless (defined $fh) {
return undef
}
my $self = bless {
GameMoves =>[], # game moves
GameComments =>{}, # comments with reference to the move
gamedescr => {}, # will contain the PGN tags
GameErrors => {}, # will contain the parsing errors
fh => \$fh # filehandle to the PGN file
}, $class;
return $self;
}
sub read_game{
# will read the game from a PGN file
# after this, the game text will be in $self->{gamedescr}{Game}
}
my $REresult = qr{(?:1\-0|0\-1|1\/2\-1\/2|\*)};
my $REmove = qr{[KQRBN]?[a-h]?[1-8]?x?[a-h][1-8](?:\=[QRBN])?};
my $REcastling = qr/O\-O(?:\-O)?/;
my $REcheck = qr/(?:(?:\#|\+(\+)?))?/;
my $REanymove = qr/(?:$REmove|$REcastling)$REcheck/;
my $RENAG = qr/\$\d+/;
my $REnumber = qr/\d+\.(?:\.\.)?/;
my $REescape = qr/^\%[^\n]*\n/;
my $REeolcomment= qr/;.*$/;
my $REcomment = $RE{balanced}{-parens=>'{}'};
my $RERAV = $RE{balanced}{-parens=>'()'};
my %switchcolor = ('w' => 'b', 'b' => 'w');
sub parse_game {
my $self = shift;
return undef unless $self->{gamedescr}{Game};
my $movecount = 0;
my $color = 'b';
$self->{gamedescr}{Game} =~ s/$REresult\s*\Z//o;
PARSER:
{
$self->{gamedescr}{Game} =~ m/\G($REnumber)\s*/mgc && do {
my $num=$1;
if (( $num =~ tr/\.//d) > 1) {
$color = 'w';
}
if ($movecount == 0) {
$movecount = $num;
}
elsif ($movecount == ($num -1)) {
$movecount++;
}
elsif ($movecount != $num) {
$self->{GameErrors}->{$movecount.$color}
.= " invalid move sequence ($num <=> $movecount)";
$movecount++;
}
redo
};
$self->{gamedescr}{Game} =~ m/\G($REanymove)\s*/mgc && do {
push @{$self->{GameMoves}}, $1;
$color = $switchcolor{$color};
redo
};
$self->{gamedescr}{Game} =~
m/\G($REcomment|$REeolcomment|$RERAV|$RENAG|$REescape)\s*/mgc
&& do
{
$self->{GameComments}->{$movecount.$color} .= " " . $1;
$self->{GameComments}->{$movecount.$color} =~ tr/\r//d;
$self->{GameComments}->{$movecount.$color} =~ s/\n/ /g;
redo
};
$self->{gamedescr}{Game} =~ m/\G(\S+\s*)/mgc && do {
$self->{GameErrors}->{$movecount.$color} .= " " . $1;
$self->{GameErrors}->{$movecount.$color} =~ tr/\r//d;
$self->{GameErrors}->{$movecount.$color} =~ s/\n/ /g;
redo
};
}
}