So far I haven't found any statistically significant speed-ups from employing this library ...
After closer look, it was the -std=c++20 language mode that enables faster vectors by 0.2 ~ 0.4 seconds versus -std=c++11.
Update 1: Support variable length words.
Update 2: Enable parallel sort. See Using Parallel Mode.
I tried OpenMP. Unfortunately, strtok is not thread-safe e.g. strtok(NULL, "\n") causing segfault. So I factored out strtok. The OpenMP result improved by 0.1 seconds. That's because the actual reading is already fast. It takes 2 threads minimally to run faster than non-OpenMP results due to populating vec_rec from local copies.
Building:
clang++ -o llil4vec -std=c++11 -Wall -O3 llil4vec.cpp
clang++ -o llil4vec -std=c++20 -Wall -O3 llil4vec.cpp # faster
# enable parallel via -fopenmp
clang++ -o llil4vec-omp -std=c++11 -fopenmp -Wall -O3 llil4vec.cpp
clang++ -o llil4vec-omp -std=c++20 -fopenmp -Wall -O3 llil4vec.cpp #
+faster
Running - Real time results:
$ time ./llil4vec big1.txt big2.txt big3.txt >out.txt
std:c++11: 2.901 secs
std:c++20: 2.850 secs
$ time OMP_NUM_THREADS=3 ./llil4vec-omp big1.txt big2.txt big3.txt >ou
+t.txt
std:c++11: 2.308 secs
std:c++20: 2.267 secs
llil4vec.cpp modification, OpenMP-aware:
#if defined(_OPENMP)
#include <omp.h>
#include <parallel/algorithm>
#else
#include <algorithm>
#endif
...
static void get_properties(
int nfiles, // in: the number of input files
char* fname[], // in: the input file names
vec_int_str_type& vec_ret) // out: a vector of properties
{
#if defined(_OPENMP)
omp_set_dynamic(0);
omp_set_max_active_levels(1);
#pragma omp parallel
{
vec_int_str_type vec_loc; // thread local copy
#pragma omp for nowait schedule(static,1)
#endif
for (int i = 0; i < nfiles; ++i) {
char line[MAX_LINE_LEN_L+1];
char* found;
llil_int_type count;
FILE* fh = ::fopen(fname[i], "r");
if (fh == NULL) {
std::cerr << "Error opening '" << fname[i] << "' : errno="
+ << errno << "\n";
continue;
}
while ( ::fgets(line, MAX_LINE_LEN_L, fh) != NULL ) {
found = ::strchr(line, '\t');
count = ::atoll( &line[found - line + 1] );
line[found - line] = '\0'; // word
#ifdef MAX_STR_LEN_L
str_type fixword { { '\0', '\0', '\0', '\0', '\0', '\0' }
+};
::memcpy( fixword.data(), line, found - line );
#if defined(_OPENMP)
vec_loc.emplace_back( -count, fixword );
#else
vec_ret.emplace_back( -count, fixword );
#endif
#else
#if defined(_OPENMP)
vec_loc.emplace_back( -count, line );
#else
vec_ret.emplace_back( -count, line );
#endif
#endif
}
::fclose(fh);
}
#if defined(_OPENMP)
#pragma omp critical
vec_ret.insert(vec_ret.end(), vec_loc.begin(), vec_loc.end());
}
#endif
// Needs to be sorted by word for later sum of adjacent count field
+s to work
#if defined(_OPENMP)
__gnu_parallel::sort( vec_ret.begin(), vec_ret.end(),
[](const int_str_type& left, const int_str_type& right) { return
+ left.second < right.second; }
);
#else
std::sort( vec_ret.begin(), vec_ret.end(),
[](const int_str_type& left, const int_str_type& right) { return
+ left.second < right.second; }
);
#endif
}
|