############################################################################ #-- This PERL script creates the KVJ Bible indexing for 7500 complete copies #-- of the Bible contained within a single 114-GIG Flat File with 528 byte #-- fixed-length records. ############################################################################ #-- #-- INPUT: #-- #-- 66 Books, 1189 Chapters, 31102 Verses #-- DUMP 1 (edited) format: #-- "1|Genesis|Ge" #-- ... #-- "66|Revelation|Re" #-- ================== #-- DUMP 2 (unedited) format: #-- "1|1|31" #-- ... #-- "66|22|21" #-- ================== #-- DUMP 3 (unedited) format: #-- "1|50" #-- ... #-- "66|22 #-- #-- I used the Abingdon's/Strong's names & abbreviations for Bible Books #-- ################################################################################ use File::Basename; use SDBM_File; use Fcntl; print "Processing... Please wait..." . "\n"; $cwd=dirname($0); %BibleIDX=(); keys %BibleIDX = 8917567; #-- ((1189 * 7500) = 8917500) + 66 + 1 = 8917567 tie( %BibleIDX, "SDBM_File", '.\KJV_Bible_SDBM_528_31102_7500', O_RDWR|O_CREAT, 0666 ); $total_books=0; $total_chps=0; $total_vers=0; ################################################################################ open(IN, "< $cwd\\KJV_BIBLE_DUMP_1_edited.txt"); #-- unloaded from MS-Access while () { chomp; #-- remove CR/LF ($bk_nbr, $name, $abbrev)=split(/\|/, $_); $BibleIDX{$bk_nbr} = $name . "," . $abbrev; #-- 1=Genesis,Ge $total_books++; } close(IN); ################################################################################ @nbr_chapters=(); open(IN2, "< $cwd\\KJV_BIBLE_DUMP_2_unedited.txt"); #-- unloaded from MS-Access while () { chomp; #-- remove CR/LF push @nbr_chapters, $_; } close(IN2); ################################################################################ open(IN3, "< $cwd\\KJV_BIBLE_DUMP_3_unedited.txt"); #-- unloaded from MS-Access while () { chomp; #-- remove CR/LF ($bk_nbr, $nbr_chps)=split(/\|/, $_); $BibleIDX{$bk_nbr} = $BibleIDX{$bk_nbr} . "," . $nbr_chps; $total_chps+=$nbr_chps; } close(IN3); ################################################################################ $offset=0; for ($tran_nbr=1; $tran_nbr<=7500; $tran_nbr++) { print "Indexing the chapters for Bible copy number $tran_nbr of 7500\n"; foreach $chapter (@nbr_chapters) { ($bk_nbr, $chp_nbr, $nbr_verses)=split(/\|/, $chapter); $key=$tran_nbr . "|" . $bk_nbr . "|" . $chp_nbr; if ($key eq "1|1|1") { $BibleIDX{$key} = $offset . "," . $nbr_verses; #-- 1|1|1 = 0,31 $nextoffset=(528 * $nbr_verses); } else { $offset = $nextoffset; $BibleIDX{$key} = $offset . "," . $nbr_verses; $nextoffset=$offset + (528 * $nbr_verses); } $total_vers+=$nbr_verses; } } ################################################################################ $total_books+=(66 * 7499); $total_chps+=(1189 * 7499); print "Process complete:" . "\n\n"; print $total_books . "\n"; print $total_chps . "\n"; print $total_vers . "\n"; $BibleIDX{"STATS"} = "(1 to 7500) " . $total_books . "," . $total_chps . "," . $total_vers; print $BibleIDX{"STATS"} . "\n"; exit; ################################################################################ END { untie(%BibleIDX); close(IN); close(IN2); close(IN3); sleep 5; } #### ####################################################################################### #-- This PERL script creates a 114-GIG Flat File containing 233+ million Bible verses, #-- for 7500 complete copies of the KJV Bible (31,102 verses/copy). #-- #-- One copy of the Bible is used as input, and is of variable-length record format, #-- with CR/LF (newline) record terminator. Output file will be formatted to 528 byte #-- fixed-length records, with no record terminator. #-- #-- Fixed-length records are used to demonstrate capacity requirements for a READ/WRITE #-- database where the records are edited in-place at reliable byte offset locations. #-- #-- A similar READ ONLY database with variable-length records would have the capacity #-- to hold 4 times as many Bible verses i.e. almost 1 Billion records/verses. #-- #-- This Flat File is being created for RANDOM ACCESS of its records, where the byte #-- offset locations are stored persistently in a PERL SDBM database file of key/value #-- pairs tied to an in-memory PERL program hash table at run-time. #-- #-- Tested on Windows 7 Home Premium with NT File System (NTFS). #-- Tested with ActiveState ActivePerl for Windows version/release 5.26.1 #-- Tested using unbuffered File I/O syntax: sysopen, sysseek, sysread, syswrite, #-- and close. ####################################################################################### use File::Basename; use Fcntl; $cwd=dirname($0); #-- this application program directory print "Processing... Please wait..." . "\n"; $total_verses = 0; @nbr_verses = (); open(IN, "< $cwd\\BibleVerses1Copy.txt") or do {print "Error on open input: $!\n"; sleep 5; die}; while () { chomp; #-- remove CR/LF push @nbr_verses, $_; #-- load 31,102 verses to an array in memory } sysopen(OUT,"$cwd\\KJV_BIBLE_SDBM_528_31102_7500.dat", O_WRONLY|O_CREAT) or do {print "Error on open output: $!\n"; sleep 5; die}; $tell = sysseek(OUT, 0, 0); #-- top of file for ($tran_nbr=1; $tran_nbr<=7500; $tran_nbr++) { print "$tran_nbr of 7500\n"; foreach $verse (@nbr_verses) { $total_verses++; $line=sprintf("%-528s", $verse); syswrite(OUT, $line); } } print "Total verses written = $total_verses \n"; exit; END { close(IN); close(OUT); sleep 5; } #### ################################################################################################# # This PERL script prints out the verses from the last chapter of the KJV Bible # i.e. Book of Revelation(book 66), chapter 22, verses 1-21. It does this by performing # Random Access to the last chapter of the 7500th copy of the Bible in a 114-GIG Flat File using # a SDBM file of key/value pairs containing the record byte offsets to the first verse within # each of the 1189 chapters within each of the 7500 copies of the Bible. # # This program works with ActivePerl (5.26.1), but not ActivePerl (5.6.1) # Any similar application built (using flat files > 4-GIG in size) must use (5.26.1) for # Joint Database Technology (JDT) i.e. # Flat Files with fixed-length records indexed by DBM Files of key/value pairs. # # Continue to use ActivePerl (5.6.1) for ODBC/MS-Jet 4.x (MS-Access Driver) SQL databases as it # does not work on ActivePerl (5.26.1), at least not on Windows 7 Home Premium O/S. # # TESTING... Windows 7 Home Premium (NT File System - NTFS)... # ################################################################################################# use File::Basename; use IO::Handle; use SDBM_File; use Fcntl; $cwd=dirname($0); $tr=7500; $bk=66; $chp=22; $key=$tr . "|" . $bk . "|" . $chp; $outfile="$cwd\\KJVBibleSDBM_528_31102_7500_Report_" . $tr. "_" . $bk . "_" . $chp . ".txt"; open(OUT,"> $outfile"); OUT->autoflush(1); sysopen(IN,"$cwd\\KJV_BIBLE_SDBM_528_31102_7500.dat", O_RDONLY); $size = sysseek(IN,0,2); #-- bottom of file print OUT "File Size: " . $size . "\n"; tie( %BibleIDX, "SDBM_File", 'KJV_Bible_SDBM_528_31102_7500', O_RDONLY, 0444 ); #-- (.pag, .dir) unless (tied %BibleIDX) { print "Error tying SDBM hash files: [$cwd\\KJV_Bible_SDBM_528_31102_7500(.pag && .dir)]\n"; sleep 5; die; } print OUT "INDEX FILE STATS: ", $BibleIDX{"STATS"}, "\n"; ($offset, $nbr_verses) = split(/,/,$BibleIDX{$key}); for ($j=1;$j<=$nbr_verses;$j++) { if ($j == 1) { $pos=sysseek(IN,$offset,0); $tell=sysseek(IN, 0, 1); print OUT "pos=($pos), tell=($tell)\n\n" } sysread(IN,$rec,528); #-- or we could read (528 * $nbr_verses) and unpack to an array $tell=sysseek(IN, 0, 1); #-- 0 bytes from current position $tell -= 528; $verse = sprintf("%s", $rec); #-- necessary because $rec initialized to 528 length w/padded spaces. #$verse =~ s/^\s+//; #-- if we had needed to remove any leading spaces $verse=~s/ *$//; #-- works on $verse but not on $rec to remove trailing spaces $verse=~s/\s+$//; #-- works on $verse but not on $rec to remove trailing spaces ($name,$abbrev)=split(/\,/,$BibleIDX{$bk}); $line=$tell . "|" . $tr . "|" . $bk . "|" . $name . "|" . $abbrev . "|" . $chp . ":" . $j . "|" . $verse . "\n\n"; print OUT $line; } exit; END { close(IN); close(OUT); untie( %BibleIDX ); print "BYE\n"; sleep 3; }