#Last Updated 05.09.09 #### if ( $file[$i] =~ m/(\\(gll|[abcdef(exg.)]g\.)|textsc\/)/ ) { #### ... Note: The script and the TeX file have to be in the same directory. ... #### N-=non- (e.g. NSG nonsingular, NPST nonpast) #### #!/usr/bin/perl =head1 NAME name-of-script =head1 SYNOPSIS name-of-script [-l] filename.tex =head1 DESCRIPTION This script reads a given LaTeX file, finds everything in the text that looks like an abbreviation, and then creates a new file in the same directory (called "filename-abbrev.txt") that lists them all. In this process, an abbreviation in LaTeX is defined as: - this... - that... - whatever else... If your LaTeX file uses abbreviations that are specified in the 'Leipzig Glossing Rules' (LGR), you can use the '-l' option to have these abbreviations listed with the full terms that they represent. In this case, the output file will list the non-LGR abbreviations first, and then the LGR ones are given with their meanings. =cut use strict; use Getopt::Long; my %lgr; while () { chomp; my ($abbr, $term) = split( /=/ ); $lgr{$abbr} = $term; } my $Usage = "$0 [-l] filename.tex\n (run 'perldoc $0' for help)\n"; my $opt_lgr; my $opt_ok = GetOptions( 'l' => \$opt_lgr ); my $arg_ok = ( @ARGV == 1 and -f $ARGV[0] ); die $Usage unless ( $opt_ok and $arg_ok ); my $filename = shift; open( TEX, "<:utf8", $filename ) or die "$0: $filename: $!\n"; my @texlines = ; close TEX; chomp @texlines; my %abbr_seen; for my $ln ( 0 .. $#texlines - 1 ) { next unless ( $texlines[$ln] =~ /(\\(?:gll|[abcdef]g\.|exg\.?))/ ); my $ln1 = $ln + 1; while ( $texlines[$ln1] =~ / [-=\s.:]([A-Z]+)[-=\s.:] | (SG|DU|PL) | ([123]) /gx ) { $abbr_seen{$1}++; } } $filename =~ s/\.tex.*//; $filename .= '-abbrev.txt'; open( ABBR, ">:utf8", $filename ) or die "$0: $filename: $!\n"; for my $abbr ( sort keys %abbr_seen ) { next if ( $opt_lgr and exists( $lgr{$abbr} )); print ABBR "$abbr\n"; } if ( $opt_lgr ) { print ABBR "\n"; for my $abbr ( sort keys %abbr_seen ) { print ABBR "\\item[$abbr] '$lgr{$abbr}'" if ( exists( $lgr{$abbr} )); } } close ABBR; __DATA__ 1=first person 2=second person 3=third person A=agent-like argument of canonical transitive verb ABL=ablative ABS=absolutive ACC=accusative ADJ=adjective ADV=adverb(ial) AGR=agreement ALL=allative ANTIP=antipassive APPL=applicative ART=article AUX=auxiliary BEN=benefactive CAUS=causative CLF=classifier COM=comitative COMP=complementizer COMPL=completive COND=conditional COP=copula CVB=converb DAT=dative DECL=declarative DEF=definite DEM=demonstrative DET=determiner DIST=distal DISTR=distributive DU=dual DUR=durative ERG=ergative EXCL=exclusive F=feminine FOC=focus FUT=future GEN=genitive IMP=imperative INCL=inclusive IND=indicative INDF=indefinite INF=infinitive INS=instrumental INTR=intransitive IPFV=imperfective IRR=irrealis LOC=locative M=masculine N=neuter N-=non- (e.g. NSG nonsingular, NPST nonpast) NEG=negation, negative NMLZ=nominalizer/nominalization NOM=nominative OBJ=object OBL=oblique P=patient-like argument of canonical transitive verb PASS=passive PFV=perfective PL=plural POSS=possessive PRED=predicative PRF=perfect PRS=present PROG=progressive PROH=prohibitive PROX=proximal/proximate PST=past PTCP=participle PURP=purposive Q=question particle/marker QUOT=quotative RECP=reciprocal REFL=reflexive REL=relative RES=resultative S=single argument of canonical intransitive verb SBJ=subject SBJV=subjunctive SG=singular TOP=topic TR=transitive VOC=vocative