For original, look at stackoverflow, or davido's excellent reproduction below.
#!usr/bin/perl
use strict;
use warnings;
my @parsed = (
'[sent. 1 len. 27]: [Others, ,, the, sea, butterflies, and, heteropods
+, ,, have, a, modified, foot, that, functions, as, a, swimming, organ
+, with, which, they, move, through, open, ocean, waters, .]
(ROOT(S(NP(NP (NNS Others))(, ,)(NP (DT the) (NN sea) (NNS butterflies
+)(CC and)(NNS heteropods))(, ,))(VP (VBP have)(NP(NP (DT a) (VBN modi
+fied) (NN foot))(SBAR(WHNP (WDT that))(S(VP (VBZ functions)(PP (IN as
+)(NP(NP (DT a) (VBG swimming) (NN organ))(SBAR(WHPP (IN with)(WHNP (W
+DT which)))(S(NP (PRP they))(VP (VBP move)(PP (IN through)(NP (JJ ope
+n) (NN ocean) (NNS waters))))))))))))) (. .)))
nsubj(have-9, Others-1)
conj_and(butterflies-5, heteropods-7)
dobj(have-9, foot-12)
nsubj(functions-14, foot-12)
nsubj(move-22, they-21)
prep_through(move-22, waters-26)' ,
'[sent. 2 len. 10]: [Radially, symmetrical, animals, move, slowly, or,
+ not, at, all, .]
(ROOT(S(NP(ADJP (RB Radially) (JJ symmetrical))(NNS animals))(VP (VBP
+move)(ADVP (RB slowly)(CC or)(RB not))(ADVP (IN at) (DT all))) (.
+.)))
nsubj(move-4, animals-3)
advmod(move-4, at-8)
pobj(at-8, all-9)' ,
'Parsing [sent. 155 len. 31]: [Flatworms, -LRB-, phylum, Platyhelminth
+es, -RRB-, have, no, body, cavity, ,, lack, organs, for, oxygen, tran
+sport, ,, have, only, one, entrance, to, the, gut, ,, and, move, by,
+beating, their, cilia, .]
(ROOT(S(NP(NP (NNS Flatworms))(PRN (-LRB- -LRB-)(NP (NNP phylum) (NNP
+Platyhelminthes))(-RRB- -RRB-)))(VP (VBP have)(S(NP(NP (DT no) (NN bo
+dy) (NN cavity))(, ,)(NP(NP (NN lack) (NNS organs))(PP (IN for)(NP (N
+N oxygen) (NN transport))))(, ,))(VP(VP (VB have)(NP (RB only) (CD on
+e) (NN entrance))(PP (TO to)(NP (DT the) (NN gut))))(, ,)(CC and)(VP
+(VB move)(PP (IN by)(S(VP (VBG beating)(NP (PRP$ their) (NN cilia))))
+))))) (. .)))
nsubj(have-6, Flatworms-1)
nsubj(move-26, cavity-9)' ,
'Parsing [sent. 27 len. 20]: [Because, fluids, are, relatively, incomp
+ressible, ,, they, move, to, another, part, of, the, cavity, when, mu
+scles, surrounding, them, contract, .]
(ROOT(S(SBAR (IN Because)(S(NP (NNS fluids))(VP (VBP are)(ADJP (RB rel
+atively) (JJ incompressible)))))(, ,)(NP (PRP they))(VP (VBP move)(PP
+ (TO to)(NP(NP (DT another) (NN part))(PP (IN of)(NP (DT the) (NN cav
+ity)))(SBAR(WHADVP (WRB when))(S(NP (NNS muscles))(VP (JJ surrounding
+)(NP (PRP them))(NP (NN contract)))))))) (. .)))
advcl(move-8, incompressible-5)
nsubj(move-8, they-7)' ,
'Parsing [sent. 18 len. 27]: [Cnidarians, also, have, epithelial, cell
+s, with, muscle, fibers, whose, contractions, enable, the, animals, t
+o, move, ,, as, well, as, nerve, nets, that, integrate, their, body,
+activities, .]
(ROOT(S(NP (NNS Cnidarians))(ADVP (RB also))(VP (VBP have)(NP(NP(NP (J
+J epithelial) (NNS cells))(PP (IN with)(NP (NN muscle) (NNS fibers)))
+(SBAR(WHNP (WP$ whose) (NNS contractions))(S(VP (VBP enable)(S(NP (DT
+ the) (NNS animals))(VP (TO to)(VP (VB move))))))))(, ,)(CONJP (RB as
+) (RB well) (IN as))(NP(NP (NN nerve) (NNS nets))(SBAR(WHNP (WDT that
+))(S(VP (VB integrate)(NP (PRP$ their) (NN body) (NNS activities)))))
+))) (. .)))
advmod(have-3, also-2)
nsubj(move-15, animals-13)'
);
# --- If qq is the same as reading in a file. :
# local $/ = 'Parsing';
# open(my $parse_corpus, '<', "/Users/jon/Desktop/stanford-postagger-f
+ull-2011-04-20/parsedLife2.txt") or die "Couldn't open directory $!";
#
# Note: split at Parsed, which is before each [sent. ...]
my @stopListNoun = ("theirs", "they");
# --- Unsure if same as real version: -- #
# open my $stop_list_noun, '<', $stopListNounFile or die "could not op
+en 'stoplist_noun.txt' $!"; ##Just open, no writing or reading?
# my @stopListNoun = <$stop_list_noun>;
# chomp @stopListNoun;
# close $stop_list_noun or die "could not close 'stoplist_noun.txt' $!
+";
#
# the file has a word each line.
</readmore>
my $search_key = "move";
my (@all_matches, @all_pronoun_matches);
my ($chapternumber, $sentencenumber, $sentence,
$grammar_relation, $argument1, $argument2);
foreach my $sentblock (@parsed)
{
chomp $sentblock;
next unless ($sentblock =~ /\[sent. (\d+) len. \d+\]: \[(.+)\]/);
$sentencenumber = $1;
$sentence = $2;
$sentence =~ s/, / /g;
$chapternumber = "1_1"; #From regex
next unless ($sentblock =~ /\b$search_key\b/i); ##Ensure the sente
+nce contains the searchkey
next unless ($sentblock =~ /\(VB\w*\s+\b$search_key\b[\)\s]+/i); #
+#Ensure searchkey is a verb
my ($arg1, $arg2, $goodmatch);
my @lines = split ("\n",$sentblock); ##Split by a newline
for my $l (0..$#lines)
{
if (($lines[$l] =~ /subj\w*\(/) && ($lines[$l] =~ /\b$
+search_key\b/i))
{
next unless ($lines[$l] =~ /\w+\(\w+\-\d+\,\s(\w+)\-\d
++\)/);
my ($matches, $pronoun_matches) = &dependency_checks($
+lines[$l], $search_key, $chapternumber, $sentencenumber, $sentence);
push @all_matches, $matches if ($matches);
push @all_pronoun_matches, $pronoun_matches if ($prono
+un_matches);
}
}
}
my %counts;
foreach my $rowref (@all_matches)
{
$counts{lc($rowref->[5])}++;
}
my %pronouncounts;
foreach my $pronounrowref (@all_pronoun_matches)
{
$pronouncounts{lc($pronounrowref->[5])}++;
}
@all_matches = sort { $counts{lc($b->[5])} <=> $counts{lc($a->[5])} ||
lc($a->[5]) cmp lc($b->[5])
} @all_matches;
# for pronoun_matches, same sort, then concatenate to all_matches
@all_pronoun_matches = sort { $pronouncounts{lc($b->[5])} <=> $pronoun
+counts{lc($a->[5])} ||
lc($a->[5]) cmp lc($b->[5])
} @all_pronoun_matches;
@all_matches = (@all_matches, @all_pronoun_matches);
my %seen_header;
my %seen_subheader;
foreach my $match (@all_matches)
{
$match->[3] = "Subject";
my $header = $counts{lc $match->[5]}." match(es) in which the ".$m
+atch->[3]." of ".$match->[4]." is ".$match->[5]." :\n\n";
print $header unless $seen_subheader{lc $match->[5]}++;
print "Section ".$match->[0].": ".$match->[2]."\n\n"; ##Section an
+d sentence (formatted)
} #Foreach match
sub dependency_checks
{
my ($line, $verbform, $chapternumber, $sentencenumber, $sentence)
+= @_;
my @matches;
my @pronoun_matches;
return unless ($line =~ /(\w+)\((\w+)\-\d+\,\s(\w+)\-\d+\)/); #Cou
+ld pass this in
$grammar_relation = $1;
$argument1 = $2;
$argument2 = $3;
foreach my $pronoun (@stopListNoun)
{
if ((lc $pronoun eq lc $argument1) || (lc $pronoun eq lc $argu
+ment2))
{
push (@pronoun_matches, $chapternumber, $sentencenumber, $
+sentence, $grammar_relation, $argument2, $argument1) if ($argument2 =
+~ /$verbform/i);
push (@pronoun_matches, $chapternumber, $sentencenumber, $
+sentence, $grammar_relation, $argument1, $argument2) if ($argument1 =
+~ /$verbform/i);
return;
}
}
#Make sure searchkey is 1st arg:
push (@matches, $chapternumber, $sentencenumber, $sentence, $gramm
+ar_relation, $argument2, $argument1) if ($argument2 =~ /$verbform/i);
push (@matches, $chapternumber, $sentencenumber, $sentence, $gramm
+ar_relation, $argument1, $argument2) if ($argument1 =~ /$verbform/i);
return (\@matches, \@pronoun_matches);
}