http://qs321.pair.com?node_id=777633
Category: Text Processing
Author/Contact Info
Description: This is a script which finds the abbreviations in the glossings and lists them alphabetically. The abbreviations defined by Leipzig Glossing Rules (http://www.eva.mpg.de/lingua/resources/glossing-rules.php) are left out per default. If you want to list the LGR abbreviations, too (and their definition), you should use the "-lgr" suffix.
#!/usr/bin/perl
#abbreviation script (ab)
#Last Updated 05.09.09

use strict;
use utf8;

my $option = "no";
my $length;
my @sorted_abbr;
my @lgr = ("1=first person",
"2=second person",
"3=third person",
"A=agent-like argument of canonical transitive verb",
"ABL=ablative",
"ABS=absolutive",
"ACC=accusative",
"ADJ=adjective",
"ADV=adverb(ial)",
"AGR=agreement",
"ALL=allative",
"ANTIP=antipassive",
"APPL=applicative",
"ART=article",
"AUX=auxiliary",
"BEN=benefactive",
"CAUS=causative",
"CLF=classifier",
"COM=comitative",
"COMP=complementizer",
"COMPL=completive",
"COND=conditional",
"COP=copula",
"CVB=converb",
"DAT=dative",
"DECL=declarative",
"DEF=definite",
"DEM=demonstrative",
"DET=determiner",
"DIST=distal",
"DISTR=distributive",
"DU=dual",
"DUR=durative",
"ERG=ergative",
"EXCL=exclusive",
"F=feminine",
"FOC=focus",
"FUT=future",
"GEN=genitive",
"IMP=imperative",
"INCL=inclusive",
"IND=indicative",
"INDF=indefinite",
"INF=infinitive",
"INS=instrumental",
"INTR=intransitive",
"IPFV=imperfective",
"IRR=irrealis",
"LOC=locative",
"M=masculine",
"N=neuter",
"N-=non- (e.g. NSG nonsingular, NPST nonpast)",
"NEG=negation, negative",
"NMLZ=nominalizer/nominalization",
"NOM=nominative",
"OBJ=object",
"OBL=oblique",
"P=patient-like argument of canonical transitive verb",
"PASS=passive",
"PFV=perfective",
"PL=plural",
"POSS=possessive",
"PRED=predicative",
"PRF=perfect",
"PRS=present",
"PROG=progressive",
"PROH=prohibitive",
"PROX=proximal/proximate",
"PST=past",
"PTCP=participle",
"PURP=purposive",
"Q=question particle/marker",
"QUOT=quotative",
"RECP=reciprocal",
"REFL=reflexive",
"REL=relative",
"RES=resultative",
"S=single argument of canonical intransitive verb",
"SBJ=subject",
"SBJV=subjunctive",
"SG=singular",
"TOP=topic",
"TR=transitive",
"VOC=vocative");

my $filename;
sub first{
print "Enter the name of your TeX file or type 'help' for help: ";
$filename = <>;
if ($filename =~ /help/) {
   help();
}else{
   parse();
};
}; #first ends

first();
#parse();
lgr();
prin();

sub parse{
#parsing tex
my $dummy = 0;
my @words;
my $check = "no";
my @spaces;
my @abbr;
chomp $filename;
if ( $filename =~ /\s/ ) {
   my @dumb = split(/\s/, $filename);
   $filename = $dumb[0];
   if ($dumb[1] =~ /-lgr/ ) {
      $option = "yes";
   };
};
open FILE, "<:utf8", $filename or die "No such file";
my @file;
while (<FILE>) {
   chomp;
   push(@file, $_);
};
close FILE;
$length = scalar(@file);
for (my $i = 0; $i < $length; $i++) {
   if ( $file[$i] =~ m/(\\(gll|[abcdef(exg.)]g\.)|textsc\/)/ ) {
      $dummy = $i + 1;
      while ( $file[$dummy] =~ m/[\-|\s]([A-Z]+)[\s|\-|\:|\.|\=]|[\=|\
+s]([A-Z]+)[\s|\-|\:|\.|\=]|[\.|\s]([A-Z]+)[\s|\-|\:|\.|\=]|[\:|\s]([A
+-Z]+)[\s|\-|\:|\.|\=]|(SG|DU|PL)|(1|2|3)|\s([A-Z]+)\s/g ) {
#    print $&."\n";
        my $hell = $&;
        $hell =~ s/-//g;$hell =~ s/\s//g;$hell =~ s/://g;$hell =~ s/\.
+//g;$hell =~ s/=//g;
        for my $line (@abbr){
              $check = "yes" if ($line eq $hell);
#              print $hell.$line.$check."\n";
        };
           push(@abbr, $hell) if ($check =~ "no");
        $check = "no";
      };
   };
};


@sorted_abbr = sort @abbr;
$length = scalar(@sorted_abbr);
foreach (@sorted_abbr) {
   chomp;
};
}; #subparse ends


sub lgr{
$length = scalar(@sorted_abbr);
if ( $option =~ /yes/ ) {
   for (my $i = 0; $i < $length;$i++) {
      foreach (@lgr) {
         my @dummy = split(/=/, $_);
         if ( $dummy[0] eq $sorted_abbr[$i] ) {
            $sorted_abbr[$i] = "\\item[$sorted_abbr[$i]] '$dummy[1]'";
         };
      };
   };
};
}; #lgr ends


sub prin{
foreach (@sorted_abbr) {
   if ( $_ !~ /^\\/ ) {
      $_ = "\\item[$_] ''";
   };
};
$filename =~ s/\.tex*//;
my $newfilename = $filename."-abb.txt";
open ABB, ">:utf8", $newfilename;
foreach (@sorted_abbr) {
    print ABB $_."\n";
};
close ABB;
}; #prin ends

sub help{
print "Instructions: 1. Enter the name of the TeX file you want to run
+ the script on.\n2. If you want to find the abbreviations already in 
+the list of 'Leipzig Glossing Rules' automatically, type '-lgr' after
+ the file name.\nExample: 'foo.tex -lgr'.\n3. The list of abbreviatio
+ns will be printed in a file '*-abb.txt'. If the name of the TeX file
+ was 'foo.tex' than the name of the list-file will be 'foo-abb-txt'.\
+n\nNote: The script and the TeX file have to be in the same directory
+.\n\nNote 2: Abbreviation is defined as:\n1. Anything which is writte
+n in uppercase and has more than one letter\n2. Anything written in \
+\textsc{}\n3. Only exceptions to the first two rules are the person/n
+umber abbreviations '1s, 1d, 2s, etc.'\n4. Only the line \\gll, \\ag.
+, \\bg. etc. \\exg. and \\exg are searched for abbreviations\n\nQuest
+ions: email to ozangulle\@gmail.com .\n";
first();
}; #help ends