# there's some looseness here to account for people's mangling
# of the English language
entire_input ::= numbers 'End_Of_Input'
numbers ::= [ NEGATIVE ]
millions | thousands | hundreds | tens | teens | ones
[ 'POINT' teens | ones { ones } ]
millions ::= [ hundreds | teens | tens | ones ]
'MILLION'
[ thousands | hundreds | teens | tens | ones ]
thousands ::= [ hundreds | teens | tens | ones ]
'THOUSAND'
[ hundreds | teens | tens | ones ]
hundreds ::= [ teens | tens | ones ] 'HUNDRED' [ teens | tens | ones ]
tens ::= twenty | thirty | forty | fifty | sixty
| seventy | eighty | ninety
teens ::= ten | eleven | twelve | thirteen | fourteen
| fifteen | sixteen | seventeen | eighteen | nineteen
ones ::= zero | one | two | three | four
| five | six | seven | eight | nine
####
package Number::FromWord;
use warnings;
use strict;
use HOP::Stream qw/iterator_to_stream/;
use HOP::Lexer qw/make_lexer/;
use HOP::Parser qw/:all/;
use Regexp::Assemble;
use Scalar::Util qw/looks_like_number/;
use base 'Exporter';
our @EXPORT_OK = 'word_to_num';
my %one_num_for = (
zero => 0,
one => 1,
two => 2,
three => 3,
four => 4,
five => 5,
six => 6,
seven => 7,
eight => 8,
nine => 9,
);
my %teen_num_for = (
ten => 10,
eleven => 11,
twelve => 12,
thirteen => 13,
fourteen => 14,
fifteen => 15,
sixteen => 16,
seventeen => 17,
eighteen => 18,
nineteen => 19,
);
my %ten_num_for = (
twenty => 20,
thirty => 30,
forty => 40,
fifty => 50,
sixty => 60,
seventy => 70,
eighty => 80,
ninety => 90,
);
#
# set up the lexer
#
my $ones_re = Regexp::Assemble->new;
foreach my $number ( keys %one_num_for ) {
$ones_re->add($number);
}
my $teens_re = Regexp::Assemble->new;
foreach my $number ( keys %teen_num_for ) {
$teens_re->add($number);
}
my $tens_re = Regexp::Assemble->new;
foreach my $number ( keys %ten_num_for ) {
$tens_re->add($number);
}
my @tokens = (
[ 'AND', qr/\band\b/i, sub { } ],
[ 'MILLION', qr/million/i, \&number ],
[ 'THOUSAND', qr/thousand/i, \&number ],
[ 'HUNDRED', qr/hundred/i, \&number ],
[ 'TENS', qr/$tens_re+/i, \&number ],
[ 'TEENS', qr/$teens_re+/i, \&number ],
[ 'ONES', qr/$ones_re+/i, \&number ],
[ 'NEGATIVE', qr/negative/i ],
[ 'POINT', qr/point/i ],
[ 'SPACE', qr/[ -]/i, sub { } ],
);
sub number {
my ( $label, $number_as_word ) = @_;
return [ $label, lc $number_as_word ];
}
sub word_to_num {
my @word = shift;
my $lexer = make_lexer( sub { shift @word }, @tokens );
return parse($lexer);
}
#
# set up the parser
#
my (
$numbers, $negative, $point, $ones, $teens,
$tens, $hundreds, $thousands, $millions
);
my $Point = parser { $point->(@_) };
my $Ones = parser { $ones->(@_) };
my $Teens = parser { $teens->(@_) };
my $Tens = parser { $tens->(@_) };
my $Hundreds = parser { $hundreds->(@_) };
my $Thousands = parser { $thousands->(@_) };
my $Millions = parser { $millions->(@_) };
my $Negative = parser { $negative->(@_) };
my $Numbers = parser { $numbers->(@_) };
# entire_input ::= numbers 'End_Of_Input'
my $entire_input = T( concatenate( $Numbers, \&End_of_Input ), sub { shift } );
# numbers ::= [ NEGATIVE ] millions | thousands | hundreds | tens | teens | ones
# [ 'POINT' teens | ones { ones } ]
$numbers = T(
concatenate(
optional( lookfor('NEGATIVE') ),
alternate( $Millions, $Thousands, $Hundreds, $Tens, $Teens, $Ones ),
optional(
concatenate(
absorb( lookfor('POINT') ),
alternate( $Teens, concatenate( $Ones, star($Ones) ) )
)
),
),
sub {
my ( $neg, $num, $point ) = @_;
$point = $point->[0];
if ( looks_like_number($point) ) { # we have teens
$num .= ".$point";
}
elsif ( defined $point->[0] ) { # we have ones
my $decimal = $point->[0];
if ( my @points = @{ $point->[1] } ) {
$decimal .= join '', @points;
}
$num .= ".$decimal";
}
if (@$neg) {
$num *= -1;
}
return $num;
}
);
# millions ::= [ hundreds | teens | tens | ones ]
# 'MILLION'
# [ thousands | hundreds | teens | tens | ones ]
$millions = T(
concatenate(
optional( alternate( $Hundreds, $Teens, $Tens, $Ones ) ),
absorb( lookfor('MILLION') ),
optional( alternate( $Thousands, $Hundreds, $Teens, $Tens, $Ones ) )
),
sub {
my ( $million, $thousands ) = @_;
$million = $million->[0] || 1;
$thousands = $thousands->[0] || 0;
return $million . sprintf "%06d", $thousands;
}
);
# thousands ::= [ hundreds | teens | tens | ones ]
# 'THOUSAND'
# [ hundreds | teens | tens | ones ]
$thousands = T(
concatenate(
optional( alternate( $Hundreds, $Teens, $Tens, $Ones ) ),
absorb( lookfor('THOUSAND') ),
optional( alternate( $Hundreds, $Teens, $Tens, $Ones ) )
),
sub {
my ( $thousand, $hundreds ) = @_;
$thousand = $thousand->[0] || 1;
$hundreds = $hundreds->[0] || 0;
return $thousand . sprintf "%03d", $hundreds;
}
);
# hundreds ::= [ teens | tens | ones ] 'HUNDRED' [ teens | tens | ones ]
$hundreds = T(
concatenate(
optional( alternate( $Teens, $Tens, $Ones ) ),
absorb( lookfor('HUNDRED') ),
optional( alternate( $Teens, $Tens, $Ones ) )
),
sub {
my ( $hundred, $tens ) = @_;
$hundred = $hundred->[0] || 1;
$tens = $tens->[0] || 0;
return $hundred . sprintf "%02d", $tens;
}
);
# tens ::= 'TENS' [ ones ]
$tens = T(
concatenate( lookfor('TENS'), optional($Ones) ),
sub {
my ( $ten_word, $one_num ) = @_;
my $ten_num = $ten_num_for{$ten_word};
$ten_num += $one_num->[0] || 0;
return $ten_num;
}
);
# teens ::= 'TEENS'
$teens = T( lookfor('TEENS'), sub { $teen_num_for{ $_[0] } } );
# ones ::= 'ONES'
$ones = T( lookfor('ONES'), sub { $one_num_for{ $_[0] } } );
sub parse {
my $stream = iterator_to_stream(shift);
my ( $results, $remainder ) = eval { $entire_input->($stream) };
return $results;
}
1;
##
##
#!perl
#use Test::More tests => 1;
use Test::More qw/no_plan/;
BEGIN {
chdir 't' if -d 't';
unshift @INC, '../lib';
use_ok( 'Number::FromWord', 'word_to_num' );
}
can_ok __PACKAGE__, 'word_to_num';
my @words = (
'three' => 3,
'fourteen' => 14,
'forty' => 40,
'forty-two' => 42,
'ninety nine' => 99,
'hundred' => 100,
'hundred and twenty' => 120,
'hundred and two' => 102,
'negative hundred and two' => -102,
'hundred ninety-nine' => 199,
'hundred nineteen' => 119,
'two hundred and three' => 203,
'twelve hundred and seventy three', => 1273,
'thousand' => 1000,
'seven hundred thirty-three thousand five hundred and twenty-nine',
733529,
'seventy three point two four five' => 73.245,
'three point seventeen' => 3.17,
'seven million' => 7000000,
'two hundred and ninety six million four hundred and twenty-two thousand five hundred and seventy eight',
296_422_578,
'two hundred and ninety six million four hundred and twenty-two thousand five hundred and seventy eight point three four seven',
296422578.347,
);
while (@words) {
my ( $word, $num ) = splice @words, 0, 2;
is word_to_num($word), $num, "... $word should be $num";
}