Part three :) I tried creating a single SQLite DB file per each list file. The query script then reads all DBs by attaching other DBs to the first DB. Something like this.
ATTACH DATABASE 'hash2.db' AS ha_2;
ATTACH DATABASE 'hash3.db' AS ha_3;
SELECT name, SUM(value) FROM (
SELECT * FROM main.kv_store UNION ALL
SELECT * FROM ha_2.kv_store UNION ALL
SELECT * FROM ha_3.kv_store
) GROUP BY name ORDER BY SUM(value) DESC, name;
DETACH DATABASE ha_2;
DETACH DATABASE ha_3;
running:
Each import takes ~ 1.5 seconds on my machine. The query takes the same amount of time as before.
$ bash llil_sql_imp2 hash1.db big1.txt
$ bash llil_sql_imp2 hash2.db big3.txt
$ bash llil_sql_imp2 hash3.db big3.txt
$ time bash llil_sql_qryn hash1.db hash2.db hash3.db >out.txt
real 0m9.650s
user 0m9.534s
sys 0m0.106s
llil_sql_qryn:
#!/usr/bin/env bash
# https://perlmonks.org/?node_id=11150248
# SQLite llil query multiple DBs.
# Usage: bash llil_sql_qryn hash1.db [ hash2.db ... ] >out.txt
# SQL
function make_script_init () {
read -r -d '' var << 'EOF'
-- Set SQLite pragmas.
--
.output /dev/null
PRAGMA cache_size = 10000;
PRAGMA page_size = 4096;
PRAGMA journal_mode = OFF;
PRAGMA synchronous = OFF;
PRAGMA temp_store = MEMORY;
.output
-- Output records.
--
.mode csv
.headers off
.separator ROW "\n"
.separator "\t"
EOF
echo "$var"
}
function make_script_query_one_db () {
read -r -d '' var << 'EOF'
SELECT "name", SUM("value") FROM "kv_store"
GROUP BY "name"
ORDER BY SUM("value") DESC, "name";
EOF
echo "$var"
}
function make_script_query_many_dbs () {
##
# ATTACH DATABASE 'hash2.db' AS ha_2;
# ATTACH DATABASE 'hash3.db' AS ha_3;
# ATTACH DATABASE 'hashN.db' AS ha_N;
#
# SELECT name, SUM(value) FROM (
# SELECT * FROM main.kv_store UNION ALL
# SELECT * FROM ha_2.kv_store UNION ALL
# SELECT * FROM ha_3.kv_store UNION ALL
# SELECT * FROM ha_N.kv_store
# ) GROUP BY name ORDER BY SUM(value) DESC, name;
#
# detach DATABASE ha_2;
# detach DATABASE ha_3;
# detach DATABASE ha_N;
##
n=1 var=""
for dbfile in "$@"; do
n=$((n + 1))
var+="ATTACH DATABASE '${dbfile}' AS ha_${n};${NL}"
done
var+="${NL}"
var+="SELECT name, SUM(value) FROM (${NL}"
var+=" SELECT * FROM main.kv_store UNION ALL${NL}"
n=1
for dbfile in "$@"; do
n=$((n + 1))
if [[ "$n" -le "$#" ]]; then
var+=" SELECT * FROM ha_${n}.kv_store UNION ALL${NL}"
else
var+=" SELECT * FROM ha_${n}.kv_store${NL}"
fi
done
var+=") GROUP BY name ORDER BY SUM(value) DESC, name;${NL}"
var+="${NL}"
n=1
for dbfile in "$@"; do
n=$((n + 1))
var+="DETACH DATABASE ha_${n};${NL}"
done
echo "$var"
}
# RUN
DB="$1"; shift
if [[ ! "$DB" =~ .db$ ]]; then
echo "usage: bash $0 hash.db [ hash2.db ... ]" >&2
exit 1
elif [[ ! -r "$DB" ]]; then
echo "$DB: cannot open '$DB' (No such file)" >&2
exit 1
fi
SQLCMND=$( command -v sqlite3 )
if [[ -z "$SQLCMND" ]]; then
echo "sqlite3: command not found" >&2
exit 1
fi
NL=$'\n'
SQLSCRIPT="$( make_script_init )"
SQLSCRIPT+="${NL}${NL}"
if [[ "$#" -eq 0 ]]; then
SQLSCRIPT+="$( make_script_query_one_db )"
elif [[ "$#" -gt 64 ]]; then
echo "$DB: cannot attach more than 64 DBs" >&2
exit 1
else
for dbfile in "$@"; do
if [[ ! -r "$dbfile" ]]; then
echo "$dbfile: cannot open '$dbfile', exiting..." >&2
exit 1
fi
done
SQLSCRIPT+="$( make_script_query_many_dbs "$@" )"
fi
exec "$SQLCMND" "$DB" < <(echo "$SQLSCRIPT")