1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
|
proc loadfile {f} {
set fd [open $f]
set data [read $fd]
close $fd
return $data
}
set ::nRow 0
set ::nRowPerDot 1000
proc load_hierachy {dir} {
foreach f [glob -nocomplain -dir $dir *] {
if {$::O(limit) && $::nRow>=$::O(limit)} break
if {[file isdir $f]} {
load_hierachy $f
} else {
db eval { INSERT INTO t1 VALUES($f, loadfile($f)) }
incr ::nRow
if {$::O(trans) && ($::nRow % $::O(trans))==0} {
db eval { COMMIT }
db eval { INSERT INTO t1(t1) VALUES('integrity-check') }
db eval { BEGIN }
}
if {($::nRow % $::nRowPerDot)==0} {
puts -nonewline .
if {($::nRow % (65*$::nRowPerDot))==0} { puts "" }
flush stdout
}
}
}
}
proc usage {} {
puts stderr "Usage: $::argv0 ?SWITCHES? DATABASE PATH"
puts stderr ""
puts stderr "Switches are:"
puts stderr " -fts4 (use fts4 instead of fts5)"
puts stderr " -fts5 (use fts5)"
puts stderr " -porter (use porter tokenizer)"
puts stderr " -delete (delete the database file before starting)"
puts stderr " -limit N (load no more than N documents)"
puts stderr " -automerge N (set the automerge parameter to N)"
puts stderr " -crisismerge N (set the crisismerge parameter to N)"
puts stderr " -prefix PREFIX (comma separated prefix= argument)"
puts stderr " -trans N (commit after N inserts - 0 == never)"
puts stderr " -hashsize N (set the fts5 hashsize parameter to N)"
exit 1
}
set O(vtab) fts5
set O(tok) ""
set O(limit) 0
set O(delete) 0
set O(automerge) -1
set O(crisismerge) -1
set O(prefix) ""
set O(trans) 0
set O(hashsize) -1
if {[llength $argv]<2} usage
set nOpt [expr {[llength $argv]-2}]
for {set i 0} {$i < $nOpt} {incr i} {
set arg [lindex $argv $i]
switch -- [lindex $argv $i] {
-fts4 {
set O(vtab) fts4
}
-fts5 {
set O(vtab) fts5
}
-porter {
set O(tok) ", tokenize=porter"
}
-delete {
set O(delete) 1
}
-limit {
if { [incr i]>=$nOpt } usage
set O(limit) [lindex $argv $i]
}
-trans {
if { [incr i]>=$nOpt } usage
set O(trans) [lindex $argv $i]
}
-automerge {
if { [incr i]>=$nOpt } usage
set O(automerge) [lindex $argv $i]
}
-crisismerge {
if { [incr i]>=$nOpt } usage
set O(crisismerge) [lindex $argv $i]
}
-prefix {
if { [incr i]>=$nOpt } usage
set O(prefix) [lindex $argv $i]
}
-hashsize {
if { [incr i]>=$nOpt } usage
set O(hashsize) [lindex $argv $i]
}
default {
usage
}
}
}
set dbfile [lindex $argv end-1]
if {$O(delete)} { file delete -force $dbfile }
sqlite3 db $dbfile
catch { load_static_extension db fts5 }
db func loadfile loadfile
db eval "PRAGMA page_size=4096"
db eval BEGIN
set pref ""
if {$O(prefix)!=""} { set pref ", prefix='$O(prefix)'" }
catch {
db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok)$pref)"
db eval "INSERT INTO t1(t1, rank) VALUES('pgsz', 4050);"
}
if {$O(hashsize)>=0} {
catch {
db eval "INSERT INTO t1(t1, rank) VALUES('hashsize', $O(hashsize));"
}
}
if {$O(automerge)>=0} {
if {$O(vtab) == "fts5"} {
db eval { INSERT INTO t1(t1, rank) VALUES('automerge', $O(automerge)) }
} else {
db eval { INSERT INTO t1(t1) VALUES('automerge=' || $O(automerge)) }
}
}
if {$O(crisismerge)>=0} {
if {$O(vtab) == "fts5"} {
db eval {INSERT INTO t1(t1, rank) VALUES('crisismerge', $O(crisismerge))}
} else {
}
}
load_hierachy [lindex $argv end]
db eval COMMIT
puts ""
|