[Event "Botvinnik Memorial"] [Site "Moscow"] [Date "2001.12.05"] [Round "4"] [White "Kasparov, Garry"] [Black "Kramnik, Vladimir"] [Result "1/2-1/2"] [ECO "C80"] [WhiteElo "2839"] [BlackElo "2808"] [PlyCount "37"] [EventDate "2001.12.01"] 1. e4 e5 2. Nf3 Nc6 3. Bb5 a6 $1 {first comment} 4. Ba4 Nf6 5. O-O Nxe4 {second comment} 6. d4 ; comment starting with ";" up to EOL b5 7. Bb3 d5 8. dxe5 Be6 9. Be3 {third comment} 9... Bc5 10. Qd3 O-O 11. Nc3 Nb4 (11... Bxe3 12. Qxe3 Nxc3 13. Qxc3 Qd7 14. Rad1 Nd8 $1 15. Nd4 c6 $14 (15... Nb7 16. Qc6 $1 $16)) 12. Qe2 Nxc3 13. bxc3 Bxe3 % escaped line - it will be discarded up to the EOL 14. Qxe3 Nc6 {wrong } comment} 15. a4 Na5 oh? 16. axb5 {yet another comment} (16. Nd4 {nested comment}) 16... axb5 17. Nd4 (17. Qc5 c6 18. Nd4 Ra6 19. f4 g6 20. Ra3 Qd7 21. Rfa1 Rfa8) 17... Qe8 18. f4 c5 19. Nxe6 the end 1/2-1/2 #### $game = { 'Event' => 'Botvinnik Memorial', 'Site' => 'Moscow', 'Round' => '4', 'White' => 'Kasparov, Garry', 'Black' => 'Kramnik, Vladimir', 'Date' => '2001.12.05', 'Result' => '1/2-1/2', 'WhiteElo' => '2839', 'BlackElo' => '2808', 'ECO' => 'C80', 'PlyCount' => '37', 'EventDate' => '2001.12.01', GameMoves => [ qw( e4 e5 Nf3 Nc6 Bb5 a6 Ba4 Nf6 O-O Nxe4 d4 b5 Bb3 d5 dxe5 Be6 Be3 Bc5 Qd3 O-O Nc3 Nb4 Qe2 Nxc3 bxc3 Bxe3 Qxe3 Nc6 a4 Na5 axb5 axb5 Nd4 Qe8 f4 c5 Nxe6) ], 'GameComments' => { '3b' => ' $1 {first comment}', '5b' => ' {second comment}', '6w' => ' ; comment starting with ";" up to EOL ', '9w' => ' {third comment}', '11b' => ' (11... Bxe3 12. Qxe3 Nxc3 13. Qxc3 Qd7 14. Rad1 Nd8 $1 15. Nd4 c6 $14 (15... Nb7 16. Qc6 $1 $16))', '14b' => ' {wrong }', '16w' => ' {yet another comment} (16. Nd4 {nested comment})', '17w' => ' (17. Qc5 c6 18. Nd4 Ra6 19. f4 g6 20. Ra3 Qd7 21. Rfa1 Rfa8)' }, 'GameErrors' => { '14b' => 'comment}', '15b' => 'oh?', '19w' => 'the end' } } #### my %switchcolor = ( 'w' => 'b', 'b' => 'w'); my @gamechars = split //, $gametext; for ($position =0; $position < scalar @gamechars; ) { if ($gamechars[$position] =~ /([\(\[\{])/) { $end_comment = _find_end_symbol(\@gamechars, $position, $1); $comment = substr($gametext, $position, $end_comment - $position +1); $game{comments}->{$movecount.$color} .= " " . $comment; $position = $end_comment + 1; } elsif ($gamechars[$position] =~ /([;%])/) { $end_comment = _find_EOL(\@gamechars, $position); # store comment as above } elsif (($gamechars[$position] =~ /[1-9]/) { $num = _find_chunk (\@gamechars, $position, $REnumber); $movecount++; $position += length($num); } elsif ($gamechars[$position] =~ /([OKQRBNa-h])/) { $move = _find_chunk(\@gamechars, $position, $REmove); $color = $switchcolor{$color}; $position += length($move); } else { $position++; # store into errors unless /\n|\s/; } } sub _find_chunck (\@array $pos $regexp ){ # returns a portion of @array starting from $pos # matching $regexp } sub _find_EOL (\@array $pos){ # returns the nearest EOL from @array starting from $pos # } sub _find_end_symbol (\@array $pos $symbol ){ # returns the position of the closing symbol # "{" => "}", "(" =>")", "[" => "]" # in \@array, starting at $pos } #### while (<>) { chomp; PARSER: { m/ \G( \d+\b )/gcx && do { print "number: $1\n"; redo; }; m/ \G( \w+ )/gcx && do { print "word: $1\n"; redo; }; m/ \G( \s+ )/gcx && do { print "space: $1\n"; redo; }; m/ \G( [^\w\d]+ )/gcx && do { print "other: $1\n"; redo; }; } } #### while (m/$REmove|$REnumber|$REcomment|$REeolcomment/g) { print $1; } #### package PGNParser; # temporary name use FileHandle; use Regexp::Common; sub new { my $class = shift; my $filename = shift || return undef; my $fh = new FileHandle "< $filename"; unless (defined $fh) { return undef } my $self = bless { GameMoves =>[], # game moves GameComments =>{}, # comments with reference to the move gamedescr => {}, # will contain the PGN tags GameErrors => {}, # will contain the parsing errors fh => \$fh # filehandle to the PGN file }, $class; return $self; } sub read_game{ # will read the game from a PGN file # after this, the game text will be in $self->{gamedescr}{Game} } my $REresult = qr{(?:1\-0|0\-1|1\/2\-1\/2|\*)}; my $REmove = qr{[KQRBN]?[a-h]?[1-8]?x?[a-h][1-8](?:\=[QRBN])?}; my $REcastling = qr/O\-O(?:\-O)?/; my $REcheck = qr/(?:(?:\#|\+(\+)?))?/; my $REanymove = qr/(?:$REmove|$REcastling)$REcheck/; my $RENAG = qr/\$\d+/; my $REnumber = qr/\d+\.(?:\.\.)?/; my $REescape = qr/^\%[^\n]*\n/; my $REeolcomment= qr/;.*$/; my $REcomment = $RE{balanced}{-parens=>'{}'}; my $RERAV = $RE{balanced}{-parens=>'()'}; my %switchcolor = ('w' => 'b', 'b' => 'w'); sub parse_game { my $self = shift; return undef unless $self->{gamedescr}{Game}; my $movecount = 0; my $color = 'b'; $self->{gamedescr}{Game} =~ s/$REresult\s*\Z//o; PARSER: { $self->{gamedescr}{Game} =~ m/\G($REnumber)\s*/mgc && do { my $num=$1; if (( $num =~ tr/\.//d) > 1) { $color = 'w'; } if ($movecount == 0) { $movecount = $num; } elsif ($movecount == ($num -1)) { $movecount++; } elsif ($movecount != $num) { $self->{GameErrors}->{$movecount.$color} .= " invalid move sequence ($num <=> $movecount)"; $movecount++; } redo }; $self->{gamedescr}{Game} =~ m/\G($REanymove)\s*/mgc && do { push @{$self->{GameMoves}}, $1; $color = $switchcolor{$color}; redo }; $self->{gamedescr}{Game} =~ m/\G($REcomment|$REeolcomment|$RERAV|$RENAG|$REescape)\s*/mgc && do { $self->{GameComments}->{$movecount.$color} .= " " . $1; $self->{GameComments}->{$movecount.$color} =~ tr/\r//d; $self->{GameComments}->{$movecount.$color} =~ s/\n/ /g; redo }; $self->{gamedescr}{Game} =~ m/\G(\S+\s*)/mgc && do { $self->{GameErrors}->{$movecount.$color} .= " " . $1; $self->{GameErrors}->{$movecount.$color} =~ tr/\r//d; $self->{GameErrors}->{$movecount.$color} =~ s/\n/ /g; redo }; } }