Category: Text Processing
Author/Contact Info
Description: This is a script which finds the abbreviations in the glossings and lists them alphabetically. The abbreviations defined by Leipzig Glossing Rules ( are left out per default. If you want to list the LGR abbreviations, too (and their definition), you should use the "-lgr" suffix.
#abbreviation script (ab)
#Last Updated 05.09.09

use strict;
use utf8;

my $option = "no";
my $length;
my @sorted_abbr;
my @lgr = ("1=first person",
"2=second person",
"3=third person",
"A=agent-like argument of canonical transitive verb",
"N-=non- (e.g. NSG nonsingular, NPST nonpast)",
"NEG=negation, negative",
"P=patient-like argument of canonical transitive verb",
"Q=question particle/marker",
"S=single argument of canonical intransitive verb",

my $filename;
sub first{
print "Enter the name of your TeX file or type 'help' for help: ";
$filename = <>;
if ($filename =~ /help/) {
}; #first ends


sub parse{
#parsing tex
my $dummy = 0;
my @words;
my $check = "no";
my @spaces;
my @abbr;
chomp $filename;
if ( $filename =~ /\s/ ) {
   my @dumb = split(/\s/, $filename);
   $filename = $dumb[0];
   if ($dumb[1] =~ /-lgr/ ) {
      $option = "yes";
open FILE, "<:utf8", $filename or die "No such file";
my @file;
while (<FILE>) {
   push(@file, $_);
close FILE;
$length = scalar(@file);
for (my $i = 0; $i < $length; $i++) {
   if ( $file[$i] =~ m/(\\(gll|[abcdef(exg.)]g\.)|textsc\/)/ ) {
      $dummy = $i + 1;
      while ( $file[$dummy] =~ m/[\-|\s]([A-Z]+)[\s|\-|\:|\.|\=]|[\=|\
+-Z]+)[\s|\-|\:|\.|\=]|(SG|DU|PL)|(1|2|3)|\s([A-Z]+)\s/g ) {
#    print $&."\n";
        my $hell = $&;
        $hell =~ s/-//g;$hell =~ s/\s//g;$hell =~ s/://g;$hell =~ s/\.
+//g;$hell =~ s/=//g;
        for my $line (@abbr){
              $check = "yes" if ($line eq $hell);
#              print $hell.$line.$check."\n";
           push(@abbr, $hell) if ($check =~ "no");
        $check = "no";

@sorted_abbr = sort @abbr;
$length = scalar(@sorted_abbr);
foreach (@sorted_abbr) {
}; #subparse ends

sub lgr{
$length = scalar(@sorted_abbr);
if ( $option =~ /yes/ ) {
   for (my $i = 0; $i < $length;$i++) {
      foreach (@lgr) {
         my @dummy = split(/=/, $_);
         if ( $dummy[0] eq $sorted_abbr[$i] ) {
            $sorted_abbr[$i] = "\\item[$sorted_abbr[$i]] '$dummy[1]'";
}; #lgr ends

sub prin{
foreach (@sorted_abbr) {
   if ( $_ !~ /^\\/ ) {
      $_ = "\\item[$_] ''";
$filename =~ s/\.tex*//;
my $newfilename = $filename."-abb.txt";
open ABB, ">:utf8", $newfilename;
foreach (@sorted_abbr) {
    print ABB $_."\n";
close ABB;
}; #prin ends

sub help{
print "Instructions: 1. Enter the name of the TeX file you want to run
+ the script on.\n2. If you want to find the abbreviations already in 
+the list of 'Leipzig Glossing Rules' automatically, type '-lgr' after
+ the file name.\nExample: 'foo.tex -lgr'.\n3. The list of abbreviatio
+ns will be printed in a file '*-abb.txt'. If the name of the TeX file
+ was 'foo.tex' than the name of the list-file will be 'foo-abb-txt'.\
+n\nNote: The script and the TeX file have to be in the same directory
+.\n\nNote 2: Abbreviation is defined as:\n1. Anything which is writte
+n in uppercase and has more than one letter\n2. Anything written in \
+\textsc{}\n3. Only exceptions to the first two rules are the person/n
+umber abbreviations '1s, 1d, 2s, etc.'\n4. Only the line \\gll, \\ag.
+, \\bg. etc. \\exg. and \\exg are searched for abbreviations\n\nQuest
+ions: email to ozangulle\ .\n";
}; #help ends