NB. -----------------------------------------------------------
NB. --- This file is "llil4.ijs"
NB. --- Run as e.g.:
NB.
NB. jconsole.exe llil4.ijs big1.txt big2.txt big3.txt out.txt
NB.
NB. --- (NOTE: last arg is output filename, file is overwritten)
NB. -----------------------------------------------------------
pattern =: 0 1
NB. ========> This line has a star in its right margin =======> NB. *
args =: 2 }. ARGV
fn_out =: {: args
fn_in =: }: args
NB. PAD_CHAR =: ' '
filter_CR =: #~ ~: & CR
make_more_space =: ' ' I. @ ((LF = ]) +. (TAB = ])) } ]
find_spaces =: I. @: = & ' '
read_file =: {{
'fname pattern' =. y
text =. make_more_space filter_CR fread fname
selectors =. (|.!.0 , {:) >: find_spaces text
width =. # pattern
height =. width <. @ %~ # selectors
append_diffs =. }: , 2& (-~/\)
shuffle_dims =. (1 0 3 & |:) @ ((2, height, width, 1) & $)
selectors =. append_diffs selectors
selectors =. shuffle_dims selectors
literal =. < @: (}:"1) @: (];. 0) & text "_1
numeric =. < @: (0&".) @: (; @: (<;. 0)) & text "_1
extract =. pattern & {
using =. 1 & \
or_maybe =. `
,(extract literal or_maybe numeric) using selectors
}}
read_many_files =: {{
'fnames pattern' =. y
,&.>/"2 (-#pattern) ]\ ,(read_file @:(; &pattern)) "0 fnames NB. *
}}
'words nums' =: read_many_files fn_in ; pattern
t1 =: (6!:1) '' NB. time since engine start
'words nums' =: (~. words) ; words +//. nums NB. *
'words nums' =: (\: nums)& { &.:>"_1 words ; nums
words =: ; nums < @ /:~/. words
t2 =: (6!:1) '' NB. time since engine start
text =: , words ,. TAB ,. (": ,. nums) ,. LF
erase 'words' ; 'nums'
text =: (#~ ~: & ' ') text
text fwrite fn_out
erase < 'text'
t3 =: (6!:1) '' NB. time since engine start
echo 'Read and parse input: ' , ": t1
echo 'Classify, sum, sort: ' , ": t2 - t1
echo 'Format and write output: ' , ": t3 - t2
echo 'Total time: ' , ": t3
echo ''
echo 'Finished. Waiting for a key...'
stdin ''
exit 0
####
> jconsole.exe llil4.ijs big1.txt big2.txt big3.txt out.txt
Read and parse input: 1.501
Classify, sum, sort: 2.09
Format and write output: 1.318
Total time: 4.909
Finished. Waiting for a key...
Peak working set (memory): 376,456K
##
##
{{ for. i. 3 do. 0 T. 0 end. }} ''
,&.>/"2 (-#pattern) ]\ ,;(read_file @:(; &pattern)) t.'' "0 fnames
'words nums' =: (~.t.'' words) , words +//. t.'' nums
##
##
Read and parse input: 0.992
Classify, sum, sort: 1.849
Format and write output: 1.319
Total time: 4.16
##
##
> jconsole.exe llil4.ijs long1.txt long2.txt long3.txt out.txt
Read and parse input: 1.329
Classify, sum, sort: 0.149
Format and write output: 0.009
Total time: 1.487
##
##
$ ./llil2vec_11149482 big1.txt big2.txt big3.txt >vec.tmp
llil2vec start
get_properties CPU time : 3.41497 secs
emplace set sort CPU time : 1.04229 secs
write stdout CPU time : 1.31578 secs
total CPU time : 5.77311 secs
total wall clock time : 5 secs
$ ./llil2vec_11149482 long1.txt long2.txt long3.txt >vec.tmp
llil2vec start
get_properties CPU time : 1.14889 secs
emplace set sort CPU time : 0.057158 secs
write stdout CPU time : 0.003307 secs
total CPU time : 1.20943 secs
total wall clock time : 2 secs
$ ./llil2vec_11149482 big1.txt big2.txt big3.txt >vec.tmp
llil2vec (fixed string length=6) start
get_properties CPU time : 2.43187 secs
emplace set sort CPU time : 0.853877 secs
write stdout CPU time : 1.33636 secs
total CPU time : 4.62217 secs
total wall clock time : 5 secs