# there's some looseness here to account for people's mangling # of the English language entire_input ::= numbers 'End_Of_Input' numbers ::= [ NEGATIVE ] millions | thousands | hundreds | tens | teens | ones [ 'POINT' teens | ones { ones } ] millions ::= [ hundreds | teens | tens | ones ] 'MILLION' [ thousands | hundreds | teens | tens | ones ] thousands ::= [ hundreds | teens | tens | ones ] 'THOUSAND' [ hundreds | teens | tens | ones ] hundreds ::= [ teens | tens | ones ] 'HUNDRED' [ teens | tens | ones ] tens ::= twenty | thirty | forty | fifty | sixty | seventy | eighty | ninety teens ::= ten | eleven | twelve | thirteen | fourteen | fifteen | sixteen | seventeen | eighteen | nineteen ones ::= zero | one | two | three | four | five | six | seven | eight | nine ##```## package Number::FromWord; use warnings; use strict; use HOP::Stream qw/iterator_to_stream/; use HOP::Lexer qw/make_lexer/; use HOP::Parser qw/:all/; use Regexp::Assemble; use Scalar::Util qw/looks_like_number/; use base 'Exporter'; our @EXPORT_OK = 'word_to_num'; my %one_num_for = ( zero => 0, one => 1, two => 2, three => 3, four => 4, five => 5, six => 6, seven => 7, eight => 8, nine => 9, ); my %teen_num_for = ( ten => 10, eleven => 11, twelve => 12, thirteen => 13, fourteen => 14, fifteen => 15, sixteen => 16, seventeen => 17, eighteen => 18, nineteen => 19, ); my %ten_num_for = ( twenty => 20, thirty => 30, forty => 40, fifty => 50, sixty => 60, seventy => 70, eighty => 80, ninety => 90, ); # # set up the lexer # my \$ones_re = Regexp::Assemble->new; foreach my \$number ( keys %one_num_for ) { \$ones_re->add(\$number); } my \$teens_re = Regexp::Assemble->new; foreach my \$number ( keys %teen_num_for ) { \$teens_re->add(\$number); } my \$tens_re = Regexp::Assemble->new; foreach my \$number ( keys %ten_num_for ) { \$tens_re->add(\$number); } my @tokens = ( [ 'AND', qr/\band\b/i, sub { } ], [ 'MILLION', qr/million/i, \&number ], [ 'THOUSAND', qr/thousand/i, \&number ], [ 'HUNDRED', qr/hundred/i, \&number ], [ 'TENS', qr/\$tens_re+/i, \&number ], [ 'TEENS', qr/\$teens_re+/i, \&number ], [ 'ONES', qr/\$ones_re+/i, \&number ], [ 'NEGATIVE', qr/negative/i ], [ 'POINT', qr/point/i ], [ 'SPACE', qr/[ -]/i, sub { } ], ); sub number { my ( \$label, \$number_as_word ) = @_; return [ \$label, lc \$number_as_word ]; } sub word_to_num { my @word = shift; my \$lexer = make_lexer( sub { shift @word }, @tokens ); return parse(\$lexer); } # # set up the parser # my ( \$numbers, \$negative, \$point, \$ones, \$teens, \$tens, \$hundreds, \$thousands, \$millions ); my \$Point = parser { \$point->(@_) }; my \$Ones = parser { \$ones->(@_) }; my \$Teens = parser { \$teens->(@_) }; my \$Tens = parser { \$tens->(@_) }; my \$Hundreds = parser { \$hundreds->(@_) }; my \$Thousands = parser { \$thousands->(@_) }; my \$Millions = parser { \$millions->(@_) }; my \$Negative = parser { \$negative->(@_) }; my \$Numbers = parser { \$numbers->(@_) }; # entire_input ::= numbers 'End_Of_Input' my \$entire_input = T( concatenate( \$Numbers, \&End_of_Input ), sub { shift } ); # numbers ::= [ NEGATIVE ] millions | thousands | hundreds | tens | teens | ones # [ 'POINT' teens | ones { ones } ] \$numbers = T( concatenate( optional( lookfor('NEGATIVE') ), alternate( \$Millions, \$Thousands, \$Hundreds, \$Tens, \$Teens, \$Ones ), optional( concatenate( absorb( lookfor('POINT') ), alternate( \$Teens, concatenate( \$Ones, star(\$Ones) ) ) ) ), ), sub { my ( \$neg, \$num, \$point ) = @_; \$point = \$point->[0]; if ( looks_like_number(\$point) ) { # we have teens \$num .= ".\$point"; } elsif ( defined \$point->[0] ) { # we have ones my \$decimal = \$point->[0]; if ( my @points = @{ \$point->[1] } ) { \$decimal .= join '', @points; } \$num .= ".\$decimal"; } if (@\$neg) { \$num *= -1; } return \$num; } ); # millions ::= [ hundreds | teens | tens | ones ] # 'MILLION' # [ thousands | hundreds | teens | tens | ones ] \$millions = T( concatenate( optional( alternate( \$Hundreds, \$Teens, \$Tens, \$Ones ) ), absorb( lookfor('MILLION') ), optional( alternate( \$Thousands, \$Hundreds, \$Teens, \$Tens, \$Ones ) ) ), sub { my ( \$million, \$thousands ) = @_; \$million = \$million->[0] || 1; \$thousands = \$thousands->[0] || 0; return \$million . sprintf "%06d", \$thousands; } ); # thousands ::= [ hundreds | teens | tens | ones ] # 'THOUSAND' # [ hundreds | teens | tens | ones ] \$thousands = T( concatenate( optional( alternate( \$Hundreds, \$Teens, \$Tens, \$Ones ) ), absorb( lookfor('THOUSAND') ), optional( alternate( \$Hundreds, \$Teens, \$Tens, \$Ones ) ) ), sub { my ( \$thousand, \$hundreds ) = @_; \$thousand = \$thousand->[0] || 1; \$hundreds = \$hundreds->[0] || 0; return \$thousand . sprintf "%03d", \$hundreds; } ); # hundreds ::= [ teens | tens | ones ] 'HUNDRED' [ teens | tens | ones ] \$hundreds = T( concatenate( optional( alternate( \$Teens, \$Tens, \$Ones ) ), absorb( lookfor('HUNDRED') ), optional( alternate( \$Teens, \$Tens, \$Ones ) ) ), sub { my ( \$hundred, \$tens ) = @_; \$hundred = \$hundred->[0] || 1; \$tens = \$tens->[0] || 0; return \$hundred . sprintf "%02d", \$tens; } ); # tens ::= 'TENS' [ ones ] \$tens = T( concatenate( lookfor('TENS'), optional(\$Ones) ), sub { my ( \$ten_word, \$one_num ) = @_; my \$ten_num = \$ten_num_for{\$ten_word}; \$ten_num += \$one_num->[0] || 0; return \$ten_num; } ); # teens ::= 'TEENS' \$teens = T( lookfor('TEENS'), sub { \$teen_num_for{ \$_[0] } } ); # ones ::= 'ONES' \$ones = T( lookfor('ONES'), sub { \$one_num_for{ \$_[0] } } ); sub parse { my \$stream = iterator_to_stream(shift); my ( \$results, \$remainder ) = eval { \$entire_input->(\$stream) }; return \$results; } 1; ##``````## #!perl #use Test::More tests => 1; use Test::More qw/no_plan/; BEGIN { chdir 't' if -d 't'; unshift @INC, '../lib'; use_ok( 'Number::FromWord', 'word_to_num' ); } can_ok __PACKAGE__, 'word_to_num'; my @words = ( 'three' => 3, 'fourteen' => 14, 'forty' => 40, 'forty-two' => 42, 'ninety nine' => 99, 'hundred' => 100, 'hundred and twenty' => 120, 'hundred and two' => 102, 'negative hundred and two' => -102, 'hundred ninety-nine' => 199, 'hundred nineteen' => 119, 'two hundred and three' => 203, 'twelve hundred and seventy three', => 1273, 'thousand' => 1000, 'seven hundred thirty-three thousand five hundred and twenty-nine', 733529, 'seventy three point two four five' => 73.245, 'three point seventeen' => 3.17, 'seven million' => 7000000, 'two hundred and ninety six million four hundred and twenty-two thousand five hundred and seventy eight', 296_422_578, 'two hundred and ninety six million four hundred and twenty-two thousand five hundred and seventy eight point three four seven', 296422578.347, ); while (@words) { my ( \$word, \$num ) = splice @words, 0, 2; is word_to_num(\$word), \$num, "... \$word should be \$num"; } ```