#!/usr/bin/perl
#abbreviation script (ab)
#Last Updated 05.09.09
use strict;
use utf8;
my $option = "no";
my $length;
my @sorted_abbr;
my @lgr = ("1=first person",
"2=second person",
"3=third person",
"A=agent-like argument of canonical transitive verb",
"ABL=ablative",
"ABS=absolutive",
"ACC=accusative",
"ADJ=adjective",
"ADV=adverb(ial)",
"AGR=agreement",
"ALL=allative",
"ANTIP=antipassive",
"APPL=applicative",
"ART=article",
"AUX=auxiliary",
"BEN=benefactive",
"CAUS=causative",
"CLF=classifier",
"COM=comitative",
"COMP=complementizer",
"COMPL=completive",
"COND=conditional",
"COP=copula",
"CVB=converb",
"DAT=dative",
"DECL=declarative",
"DEF=definite",
"DEM=demonstrative",
"DET=determiner",
"DIST=distal",
"DISTR=distributive",
"DU=dual",
"DUR=durative",
"ERG=ergative",
"EXCL=exclusive",
"F=feminine",
"FOC=focus",
"FUT=future",
"GEN=genitive",
"IMP=imperative",
"INCL=inclusive",
"IND=indicative",
"INDF=indefinite",
"INF=infinitive",
"INS=instrumental",
"INTR=intransitive",
"IPFV=imperfective",
"IRR=irrealis",
"LOC=locative",
"M=masculine",
"N=neuter",
"N-=non- (e.g. NSG nonsingular, NPST nonpast)",
"NEG=negation, negative",
"NMLZ=nominalizer/nominalization",
"NOM=nominative",
"OBJ=object",
"OBL=oblique",
"P=patient-like argument of canonical transitive verb",
"PASS=passive",
"PFV=perfective",
"PL=plural",
"POSS=possessive",
"PRED=predicative",
"PRF=perfect",
"PRS=present",
"PROG=progressive",
"PROH=prohibitive",
"PROX=proximal/proximate",
"PST=past",
"PTCP=participle",
"PURP=purposive",
"Q=question particle/marker",
"QUOT=quotative",
"RECP=reciprocal",
"REFL=reflexive",
"REL=relative",
"RES=resultative",
"S=single argument of canonical intransitive verb",
"SBJ=subject",
"SBJV=subjunctive",
"SG=singular",
"TOP=topic",
"TR=transitive",
"VOC=vocative");
my $filename;
sub first{
print "Enter the name of your TeX file or type 'help' for help: ";
$filename = <>;
if ($filename =~ /help/) {
help();
}else{
parse();
};
}; #first ends
first();
#parse();
lgr();
prin();
sub parse{
#parsing tex
my $dummy = 0;
my @words;
my $check = "no";
my @spaces;
my @abbr;
chomp $filename;
if ( $filename =~ /\s/ ) {
my @dumb = split(/\s/, $filename);
$filename = $dumb[0];
if ($dumb[1] =~ /-lgr/ ) {
$option = "yes";
};
};
open FILE, "<:utf8", $filename or die "No such file";
my @file;
while (<FILE>) {
chomp;
push(@file, $_);
};
close FILE;
$length = scalar(@file);
for (my $i = 0; $i < $length; $i++) {
if ( $file[$i] =~ m/(\\(gll|[abcdef(exg.)]g\.)|textsc\/)/ ) {
$dummy = $i + 1;
while ( $file[$dummy] =~ m/[\-|\s]([A-Z]+)[\s|\-|\:|\.|\=]|[\=|\
+s]([A-Z]+)[\s|\-|\:|\.|\=]|[\.|\s]([A-Z]+)[\s|\-|\:|\.|\=]|[\:|\s]([A
+-Z]+)[\s|\-|\:|\.|\=]|(SG|DU|PL)|(1|2|3)|\s([A-Z]+)\s/g ) {
# print $&."\n";
my $hell = $&;
$hell =~ s/-//g;$hell =~ s/\s//g;$hell =~ s/://g;$hell =~ s/\.
+//g;$hell =~ s/=//g;
for my $line (@abbr){
$check = "yes" if ($line eq $hell);
# print $hell.$line.$check."\n";
};
push(@abbr, $hell) if ($check =~ "no");
$check = "no";
};
};
};
@sorted_abbr = sort @abbr;
$length = scalar(@sorted_abbr);
foreach (@sorted_abbr) {
chomp;
};
}; #subparse ends
sub lgr{
$length = scalar(@sorted_abbr);
if ( $option =~ /yes/ ) {
for (my $i = 0; $i < $length;$i++) {
foreach (@lgr) {
my @dummy = split(/=/, $_);
if ( $dummy[0] eq $sorted_abbr[$i] ) {
$sorted_abbr[$i] = "\\item[$sorted_abbr[$i]] '$dummy[1]'";
};
};
};
};
}; #lgr ends
sub prin{
foreach (@sorted_abbr) {
if ( $_ !~ /^\\/ ) {
$_ = "\\item[$_] ''";
};
};
$filename =~ s/\.tex*//;
my $newfilename = $filename."-abb.txt";
open ABB, ">:utf8", $newfilename;
foreach (@sorted_abbr) {
print ABB $_."\n";
};
close ABB;
}; #prin ends
sub help{
print "Instructions: 1. Enter the name of the TeX file you want to run
+ the script on.\n2. If you want to find the abbreviations already in
+the list of 'Leipzig Glossing Rules' automatically, type '-lgr' after
+ the file name.\nExample: 'foo.tex -lgr'.\n3. The list of abbreviatio
+ns will be printed in a file '*-abb.txt'. If the name of the TeX file
+ was 'foo.tex' than the name of the list-file will be 'foo-abb-txt'.\
+n\nNote: The script and the TeX file have to be in the same directory
+.\n\nNote 2: Abbreviation is defined as:\n1. Anything which is writte
+n in uppercase and has more than one letter\n2. Anything written in \
+\textsc{}\n3. Only exceptions to the first two rules are the person/n
+umber abbreviations '1s, 1d, 2s, etc.'\n4. Only the line \\gll, \\ag.
+, \\bg. etc. \\exg. and \\exg are searched for abbreviations\n\nQuest
+ions: email to ozangulle\@gmail.com .\n";
first();
}; #help ends
-
Are you posting in the right place? Check out Where do I post X? to know for sure.
-
Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
<code> <a> <b> <big>
<blockquote> <br /> <dd>
<dl> <dt> <em> <font>
<h1> <h2> <h3> <h4>
<h5> <h6> <hr /> <i>
<li> <nbsp> <ol> <p>
<small> <strike> <strong>
<sub> <sup> <table>
<td> <th> <tr> <tt>
<u> <ul>
-
Snippets of code should be wrapped in
<code> tags not
<pre> tags. In fact, <pre>
tags should generally be avoided. If they must
be used, extreme care should be
taken to ensure that their contents do not
have long lines (<70 chars), in order to prevent
horizontal scrolling (and possible janitor
intervention).
-
Want more info? How to link
or How to display code and escape characters
are good places to start.