#! /usr/bin/perl
use strict;
use warnings;
use Argdom;
my $globalVars = {conf => '', lang => '', paths => []};
sub extractRelations;
sub keyChooser;
sub isOneOf;
sub singleChooser;
sub setGlobalVars;
sub isValidLine;
sub splitTwo;
sub replaceNotations;
sub replaceSafely;
sub printHelp;
binmode STDOUT, ':utf8'; # Especially during debugging
# To make the script more organised ...
sub lesmetsMain
{
my ($relations, $aider, $arr) = ({}, 0, 0);
my $ad = new Argdom(\@_);
$ad->setKeyChooser(\&keyChooser);
$ad->setSingleChooser(\&singleChooser);
$arr = $ad->getArbs;
$aider = isOneOf('--help', $arr);
return printHelp if ($aider or isOneOf('-h', $arr));
setGlobalVars $ad;
extractRelations $relations;
replaceNotations $relations;
print "\n";
0;
}
exit lesmetsMain @ARGV;
sub extractRelations
{
my ($toWhere, $confFile, $l, $lit) = (shift, $globalVars->{'conf'}
+->[0], 0, '');
my @kv = ();
$l = $globalVars->{'lang'};
$confFile = $ENV{'HOME'} . '/.lesmets.' . ($l ? $l->[0] : 'def') u
+nless $confFile;
open CONF, '<:utf8', $confFile or die "Could not open $confFile: $
+!";
while(<CONF>)
{
chomp;
next if not isValidLine $_; # Because I hate ...
@kv = splitTwo $_; # ... regular expressi
+ons. Too difficult.
$toWhere->{$kv[0]} = $kv[1];
}
close CONF;
}
sub keyChooser
{
isOneOf((shift), ['-conf', '-lang']);
}
sub isOneOf
{
die 'Insufficient parameters to isOneOf sub' unless @_ > 1;
die 'Parameter two to isOneOf should be an array ref' unless ref $
+_[1] eq 'ARRAY';
foreach my $notI (@{$_[1]})
{
return 1 if $notI eq $_[0];
}
0;
}
sub singleChooser
{
0;
}
sub setGlobalVars
{
die 'setGlobalvars takes one parameter' unless @_;
die 'Parameter one to setGlobalVars must be an Argdom object' unle
+ss ref $_[0] eq 'Argdom';
my $ad = shift;
my %ks = %{$ad->getKeys};
$globalVars->{'conf'} = $ks{'-conf'};
$globalVars->{'lang'} = $ks{'-lang'};
$globalVars->{'paths'} = $ad->getArbs;
}
# Uses things that are less efficient than regular expressions, but
+ more intelligible -- to me, at least.
sub isValidLine
{
my ($l, $notI, $W, $char, $line) = (length $_[0], 0, 0, '', shift)
+;
for(; $notI < $l; ++$notI)
{
$char = substr $line, $notI, 1;
$W = 1 unless isOneOf($char, [' ', "\t"]);
return 0 if $char eq '#';
last if $W;
}
return 0 unless $W;
1;
}
sub splitTwo
{
my $space = index $_[0], ' ';
my @rez = ((substr $_[0], 0, $space), substr $_[0], $space + 1);
@rez;
}
sub replaceNotations
{
my ($rels, $paths, $interactive, $endit) = (shift, [], 0, 0);
$paths = $globalVars->{'paths'};
push @{$paths}, '-' unless $paths;
foreach my $path (@{$paths})
{
if($path eq '-')
{
$interactive = 1;
binmode STDOUT, ':utf8';
binmode STDIN, ':utf8';
}
(open RAWFILE, '<:utf8', $path or die "Could not open $path fo
+r reading: $!") unless $interactive;
(open OUTFILE, '>:utf8', $path . '.out' or die "Could not open
+ $path.out for writing: $!") unless $interactive;
while(1)
{
$_ = <RAWFILE> unless $interactive;
$_ = <STDIN> if $interactive;
last unless $_;
foreach my $mem (keys %{$rels})
{
$_ = replaceSafely $_, $mem, $rels->{$mem};
}
print OUTFILE $_ unless $interactive;
print $_ if $interactive;
}
unless($interactive)
{
close OUTFILE;
close RAWFILE;
}
}
}
sub replaceSafely
{
my ($rez, $cur, $cue, $rep, $pos, $at, $l) = ('', shift, shift, sh
+ift, 0, 0, 0);
$l = length $cue;
while(($pos = index $cur, $cue, $at) != -1)
{
$rez .= substr $cur, $at, $pos - $at;
$rez .= $rep;
$at = $pos + $l;
}
$rez .= substr $cur, $at;
$rez;
}
sub printHelp
{
print qq
/Usage:
$0 [-h | --help] [-lang xy] [-conf conf_path] [file [file [...]]] [-]
-h | --help Prints this help message and quits.
-lang xy xy should be a language code (depends on you). If it p
+rovided, the config file
that is used to giude $0 will have to be ~\/.lesmets.x
+y.
If you invoke thus: $0 -lang fr
then the config file will have to be ~\/.lesmets.fr
-conf conf_path Sets the config file's path. If this is provided, the
+-lang value is not used as explained above.
The default config file should be at ~\/.lesmets.def (
+like passing -lang def).
file These are the files to process. They are treated one b
+y one, in that order.
A file is written out for each, with the same name, bu
+t with a `.out' added to the end. This is the file
that contains the edited data.
- That tells the program to get input via STDIN, and pri
+nt to STDOUT. It is considered, in the internal
logic, as the path to STDIN, and `-.out' as the path t
+o STDOUT.
Information on how to write the config file is in the POD documentatio
+n, which you can get out by doing this:
pod2html --outfile lesmets.html lesmets.pl
and then reading the lesmets.html file in Unicode-good browser. Try:
w3m lesmets.html
/;
0;
}
__DATA__
=pod
=head1 NAME
lesmets.pl - Accents-on-my-ASCII script
=head1 SYNOPSIS
Sample usage:
lesmets.pl -
Franc>ais: Les boeufs n'ont pas mange/ les cadeaux de Caesar.
Français: Les bœufs n'ont pas mangé les cadeaux de Cæsar.
That run used the default config file, which is written out in here.
=head1 DESCRIPTION
Helps you put accents on characters that should have them (for the Rom
+ance languages, for example), with a keyboard
that doesn't have characters. That calls for writing with the availabl
+e chars to indicate where you want accents to be.
It goes beyond putting accents, but that's what I use it for. It can c
+hange stuff from any format to any format. All
you have to do is put it all in a config file.
=head1 USAGE
To get information about how to use it (and it is very simple), run:
lesmets.pl --help
You can use your shell's rc (C<.bashrc>, usually) to make it easier to
+ use. This line is in my C<.bashrc>:
alias lesmets='lesmets.pl'
So that I don't have to remember to put the C<.pl> extension. Also, I
+have it in one of my PATH directories.
Makes it all much easier.
Don't forget to read through again, after processing. Some character s
+equences, that you may have meant in good faith, may
have been understood to be codes for the program. B< I<You should read
+ through.> >
The code requires the C<Argdom.pm> module, which should also be availa
+ble where you got this file.
=head1 CONFIGURATION FILES
=head2 Sample Config File
# This is the default config file for lesmets. It is good enoug
+h.
# You can save it at ~/lesmets.def
# What you should put there, for ease, should be the config fil
+e you use most. This happens to be the one,
# for me.
# By the way, I don't remember where I got this idea, but I mus
+t have been reading about one of those
# made-for-computer cross-language dictionaries. I'll put the c
+redits here when I find it again.
# And I got the accented characters from Open Office (Menu: Ins
+ert -> Special Character)
A/ Á
A\ À
A^ Â
A: Ä
AE Æ
C> Ç
E\ È
E/ É
E^ Ê
E: Ë
I\ Ì
I/ Í
I^ Î
I: Ï
O\ Ò
O/ Ó
O^ Ô
O: Ö
U\ Ù
U/ Ú
U^ Û
U: Ü
Y/ Ý
a\ à
a/ á
a^ â
a: ä
ae æ
c> ç
e\ è
e/ é
e^ ê
e: ë
i\ ì
i/ í
i^ î
i: ï
o\ ò
o/ ó
o^ ô
o: ö
u\ ù
u/ ú
u^ û
u: ü
oe œ
=head2 How to Write a Config File
Any line whose first non-whitespace character is a hash (#) is a comme
+nt, and is skipped over. Same for lines with no
non-whitespace characters.
# does not show the beginning of a comment. It is only that if it is t
+he first non-whitespace char in the line. In other
cases, it is part of the codes.
So, the config file codes are two columns of characters. The columns a
+re separated by a single space only. Tabs work,
but don't use them. The Left is the series of characters that you will
+ type out. They are usually what you have on your
keyboard. The Right is the sequaence (usually one character) that will
+ replace the Left.
So, with that config file, you have all occurences of o: being replace
+d with o with a diaresis at the top.
Because all these sequences may occur without your intention to make t
+hem codes, you should read through, after processing
the file. I didn't feel like putting logic to skip over some bits, bec
+ause it is too rare a case to make me add 1000 lines
for. Also, I'm not planning to use this to write a book E<#8212> just
+a bit of correspondence. Same should apply to you.
And, if you really want that, you can add it. See the L<"COPYRIGHT"> s
+ection.
=head1 EXTENDING NAUTILUS
You can use this script to extend Nautilus, the GNOME File manager. It
+ is very nifty. You will only have to right-click a file icon, and th
+en go to Scripts on the menu that pops up, and click lesmets.pl. The
+processed file will show up in the same folder.
Copy lesmets.pl into Nautilus' script directory. It is usually ~/.gnom
+e2/nautilus-scripts. Then, visit that directory with Nautilus, so it
+can know there is a script there (if it does not show ``Scripts'' whe
+n you right click a file).
Now, Nautilus is ready to use lesmets.pl! That easy!
=head1 CREDITS
I was checking for an English-French dictionary for my phone, and I st
+umbled on some project to create a translation dictionary for compute
+rs. I think that is where I checked and found that they were represen
+ting the characters that had accents on them the way I have done it i
+n that config file. All I remember is the stuff of the / and \ accent
+s. The rest, I had to get adventurous.
=head1 TO-DO
=over
=item 0. Add a GTK+ front-end.
This should be so simple as to discourage me from trying it.
=item 1. Find out how to extend Konq (I don't use KDE as of now), and
+add it in here.
=back
=head1 BUGS
None that I know of. But one feature is missing: indication of a secti
+on of the source file to skip over.
=head1 AUTHOR
Revence XXVII <revence27@praize.com>
=head1 COPYRIGHT
No copyright, no licence.
This code, algorithms and all the ideas pertaining to this intellectua
+l property, I hereby place in the Public Domain.
=cut
|