This program reads in a sentence. It then takes out all the a^ o^ etc out, cos some people may use them to do â or ô etc cos they don't know how.
It then changes it to the \xe2 equiv so that a spell checker can check it, and display the correct result.
I've started byt changing â, cos i forgot to put "" instead of ''. Use the â as example.
use strict ;
use CGI qw/:standard/;
use CGI::Carp qw(fatalsToBrowser);
use HTML::Template;
use Data::Dumper;
use Unicode::String;
my $q = new CGI;
sub accentmatch()
{
my $brawddeg = shift;
$brawddeg =~ s/([aeiouwy][\"\^\`\/])/utf($1)/ge;
return $brawddeg;
}
sub utf()
{
my $acen = shift;
my $transacen;
my %conv = ('a^'=>"\xe2", 'e^'=>'\x0EA', 'i^'=>'\x0EE', 'o^'=>"
+\x0f4", 'u^'=>'\0FB', 'w^'=>"\x175", 'y^'=>'\x177',
'a"'=>'\x0E4', 'e"'=>'\x0EB', 'i"'=>'\x0EF', 'o"'=>'\x0
+F6', 'u"'=>'\0FC', 'w"'=>'\x1E85', 'y"'=>'\xff',
'a/'=>'\x0E1', 'e/'=>'\x0E9', 'i/'=>'\x0ED', 'o/'=>'\x0
+F3', 'u/'=>'\0FA', 'w/'=>'\x1E83', 'y/'=>'\xfd',
'a\''=>'\x0E0', 'e\''=>'\x0E8', 'i\''=>'\x0EC', 'o\''=>'\x
+0F2', 'u\''=>'\0F9', 'w\''=>'\x1E81', 'y\''=>'\x1EF3',
'A^'=>'\x0C2', 'E^'=>'\x0CA', 'I^'=>'\x0CE', 'O^'=>'\x0
+D4', 'U^'=>'\0DB', 'W^'=>'\x174', 'Y^'=>'\x176',
'A"'=>'\x0C4', 'E"'=>'\x0CB', 'I"'=>'\x0CF', 'O"'=>'\x0
+D6', 'U"'=>'\0DC', 'W"'=>'\x1E84', 'Y"'=>'\x178',
'A/'=>'\x0C1', 'E/'=>'\x0C9', 'I/'=>'\x0CD', 'O/'=>'\x0
+D3', 'U/'=>'\0DA', 'W/'=>'\x1E82', 'Y/'=>'\xdd',
'A\''=>'\x0C0', 'E\''=>'\x0C8', 'I\''=>'\x0CC', 'O\''=>'\x
+0D2', 'U\''=>'\0D9', 'W\''=>'\x1E80', 'Y\''=>'\x1EF2');
die unless $conv{$acen};
return( $conv{$acen} );
}
But if I enter words like tân, i get t(square thing - the cannot diplay symbol symbol).
Any ideas?
Thanks for your help.
P.s. I know I don't need most of the stuff at the top but ignore that.