NB. ----------------------------------------------------------- NB. --- This file is "llil4.ijs" NB. --- Run as e.g.: NB. NB. jconsole.exe llil4.ijs big1.txt big2.txt big3.txt out.txt NB. NB. --- (NOTE: last arg is output filename, file is overwritten) NB. ----------------------------------------------------------- pattern =: 0 1 NB. ========> This line has a star in its right margin =======> NB. * args =: 2 }. ARGV fn_out =: {: args fn_in =: }: args NB. PAD_CHAR =: ' ' filter_CR =: #~ ~: & CR make_more_space =: ' ' I. @ ((LF = ]) +. (TAB = ])) } ] find_spaces =: I. @: = & ' ' read_file =: {{ 'fname pattern' =. y text =. make_more_space filter_CR fread fname selectors =. (|.!.0 , {:) >: find_spaces text width =. # pattern height =. width <. @ %~ # selectors append_diffs =. }: , 2& (-~/\) shuffle_dims =. (1 0 3 & |:) @ ((2, height, width, 1) & $) selectors =. append_diffs selectors selectors =. shuffle_dims selectors literal =. < @: (}:"1) @: (];. 0) & text "_1 numeric =. < @: (0&".) @: (; @: (<;. 0)) & text "_1 extract =. pattern & { using =. 1 & \ or_maybe =. ` ,(extract literal or_maybe numeric) using selectors }} read_many_files =: {{ 'fnames pattern' =. y ,&.>/"2 (-#pattern) ]\ ,(read_file @:(; &pattern)) "0 fnames NB. * }} 'words nums' =: read_many_files fn_in ; pattern t1 =: (6!:1) '' NB. time since engine start 'words nums' =: (~. words) ; words +//. nums NB. * 'words nums' =: (\: nums)& { &.:>"_1 words ; nums words =: ; nums < @ /:~/. words t2 =: (6!:1) '' NB. time since engine start text =: , words ,. TAB ,. (": ,. nums) ,. LF erase 'words' ; 'nums' text =: (#~ ~: & ' ') text text fwrite fn_out erase < 'text' t3 =: (6!:1) '' NB. time since engine start echo 'Read and parse input: ' , ": t1 echo 'Classify, sum, sort: ' , ": t2 - t1 echo 'Format and write output: ' , ": t3 - t2 echo 'Total time: ' , ": t3 echo '' echo 'Finished. Waiting for a key...' stdin '' exit 0 #### > jconsole.exe llil4.ijs big1.txt big2.txt big3.txt out.txt Read and parse input: 1.501 Classify, sum, sort: 2.09 Format and write output: 1.318 Total time: 4.909 Finished. Waiting for a key... Peak working set (memory): 376,456K #### {{ for. i. 3 do. 0 T. 0 end. }} '' ,&.>/"2 (-#pattern) ]\ ,;(read_file @:(; &pattern)) t.'' "0 fnames 'words nums' =: (~.t.'' words) , words +//. t.'' nums #### Read and parse input: 0.992 Classify, sum, sort: 1.849 Format and write output: 1.319 Total time: 4.16 #### > jconsole.exe llil4.ijs long1.txt long2.txt long3.txt out.txt Read and parse input: 1.329 Classify, sum, sort: 0.149 Format and write output: 0.009 Total time: 1.487 #### $ ./llil2vec_11149482 big1.txt big2.txt big3.txt >vec.tmp llil2vec start get_properties CPU time : 3.41497 secs emplace set sort CPU time : 1.04229 secs write stdout CPU time : 1.31578 secs total CPU time : 5.77311 secs total wall clock time : 5 secs $ ./llil2vec_11149482 long1.txt long2.txt long3.txt >vec.tmp llil2vec start get_properties CPU time : 1.14889 secs emplace set sort CPU time : 0.057158 secs write stdout CPU time : 0.003307 secs total CPU time : 1.20943 secs total wall clock time : 2 secs $ ./llil2vec_11149482 big1.txt big2.txt big3.txt >vec.tmp llil2vec (fixed string length=6) start get_properties CPU time : 2.43187 secs emplace set sort CPU time : 0.853877 secs write stdout CPU time : 1.33636 secs total CPU time : 4.62217 secs total wall clock time : 5 secs