File: clean_keydb.ml

package info (click to toggle)
sks 1.1.6-14
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, buster, sid
  • size: 2,296 kB
  • sloc: ml: 15,228; ansic: 1,069; sh: 358; makefile: 347
file content (374 lines) | stat: -rw-r--r-- 13,079 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
(***********************************************************************)
(* clean_keydb.ml - Executable: Cleans up various problems that occur  *)
(*                  in key databases                                   *)
(*                                                                     *)
(* Currently, this includes:                                           *)
(* - Merging all mergeable keys                                        *)
(* - Eliminating keys with unparseable packet sequences                *)
(* - Eliminating duplicates                                            *)
(*   (Note, this doesn't get rid of ALL duplicates, for instance, if   *)
(*    the same signature is used to sign two different keys, it is not *)
(*    removed. Removal is only done if it leaves a reasonable packet   *)
(*    structure in place.)                                             *)
(*                                                                     *)
(* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, *)
(*               2011, 2012, 2013  Yaron Minsky and Contributors       *)
(*                                                                     *)
(* This file is part of SKS.  SKS is free software; you can            *)
(* redistribute it and/or modify it under the terms of the GNU General *)
(* Public License as published by the Free Software Foundation; either *)
(* version 2 of the License, or (at your option) any later version.    *)
(*                                                                     *)
(* This program is distributed in the hope that it will be useful, but *)
(* WITHOUT ANY WARRANTY; without even the implied warranty of          *)
(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU   *)
(* General Public License for more details.                            *)
(*                                                                     *)
(* You should have received a copy of the GNU General Public License   *)
(* along with this program; if not, write to the Free Software         *)
(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *)
(* USA or see <http://www.gnu.org/licenses/>.                          *)
(***********************************************************************)

module F(M:sig end) =
struct
  open StdLabels
  open MoreLabels
  open Printf
  open Arg
  open Common
  module Set = PSet.Set
  module Map = PMap.Map
  module Unix = UnixLabels
  open Packet
  open Bdb

  let settings = {
    Keydb.withtxn = !Settings.transactions;
    Keydb.cache_bytes = !Settings.cache_bytes;
    Keydb.pagesize = !Settings.pagesize;
    Keydb.keyid_pagesize = !Settings.keyid_pagesize;
    Keydb.meta_pagesize = !Settings.meta_pagesize;
    Keydb.subkeyid_pagesize = !Settings.subkeyid_pagesize;
    Keydb.time_pagesize = !Settings.time_pagesize;
    Keydb.tqueue_pagesize = !Settings.tqueue_pagesize;
    Keydb.word_pagesize = !Settings.word_pagesize;
    Keydb.dbdir = Lazy.force Settings.dbdir;
    Keydb.dumpdir = Lazy.force Settings.dumpdir;
  }

  (** we need full keydb access because we're playing directly with
    databases and cursors and such
  *)
  module Keydb = Keydb.Unsafe


  let ( |= ) map key = Map.find key map
  let ( |< ) map (key,data) = Map.add ~key ~data map

  let ctr = ref 0
  let tick () =
    incr ctr;
    if !ctr mod 10000 = 0 then
      perror "%d thousand steps processed" (!ctr/1000)


  type action = Delete of key | Swap of (key * key)

  let do_action action = match action with
    | Swap (key1,key2) -> Keydb.swap_keys key1 key2
    | Delete key -> Keydb.delete_key key

  let do_opt f opt = match opt with
    | None -> ()
    | Some x -> f x

  (** Canonicalize a key if it is required.  This assumes that the given
    key is actually in the database *)
  let canonicalize_key key =
    try
      let ckey = Fixkey.canonicalize key in
      if KeyHash.hash ckey <> KeyHash.hash key then
        begin
          perror "Swap found: %s -> %s"
            (KeyHash.hexify (KeyHash.hash key))
            (KeyHash.hexify (KeyHash.hash ckey));
          Some (Swap (key,ckey))
        end
      else None
    with
        Fixkey.Bad_key ->
          perror "Key to be deleted: %s" (KeyHash.hexify (KeyHash.hash key));
          Some (Delete key)


  let at_once = match !Settings.n with
      0 -> 10000
    | n -> n

  let canonicalize_indirect () =
    ctr := 0;
    perror "Starting indirect canonicalization";

    let dbs = Keydb.get_dbs () in
    let filearray = dbs.Keydb.dump.Keydb.filearray in

    let actions = ref [] in
    let num_actions = ref 0 in

    let filter_actions actions =
      let actions = List.map actions
                      ~f:(function
                            | Delete key as action ->
                                (KeyHash.hash key, action)
                            | Swap (key1,key2) as action ->
                                (KeyHash.hash key1, action)
                         )
      in
      let actions = List.sort ~cmp:compare actions in
      let actions = List.filter actions
                      ~f:(fun (hash,action) -> Keydb.has_hash hash)
      in
      List.map ~f:(fun (hash,action) -> action) actions
    in

    let run_stored_actions () =
      let filt_actions = filter_actions !actions in
      perror "doing %d out of %d update actions"
        (List.length filt_actions) (List.length !actions);
      let dbactions =
        List.fold_left ~init:[] filt_actions
          ~f:(fun list action -> match action with
                  Delete key ->
                    (Keydb.key_to_metadata key, Keydb.DeleteKey)::list
                | Swap (key1,key2) ->
                    (Keydb.key_to_metadata key1, Keydb.DeleteKey)::
                    (Keydb.key_to_metadata key2, Keydb.AddKey)::list
             )
      in
      Keydb.apply_md_updates (Array.of_list dbactions);
      Keydb.unconditional_checkpoint ();
      actions := [];
      num_actions := 0
    in

    let add_action action =
      actions := action::!actions;
      incr num_actions;

      if !num_actions >= at_once then
        run_stored_actions ()

    in


    Array.iteri filearray
      ~f:(fun i inchan ->
            perror "Starting keydump %d" i;
            seek_in inchan 0;
            let cin = new Channel.sys_in_channel inchan in
            let get = Key.get_of_channel cin in
            try
              while true do
                tick ();
                let key = get () in
                let action = canonicalize_key key in
                do_opt add_action action
              done
            with
                Not_found -> ()
         );

    run_stored_actions ();
    perror "Indirect canonicalization complete"


  (** iterate through the entire database, replacing all non-canonical keys
    with canonicalized versions.  Delete all non-canonicalizable keys.  Only
    work on keys stored directly in the database.  Keys stored indirectly
    will be fixed by scanning the initial keydump.

    Note that this is not nearly so highly-optimized as canonicalize_indirect.
    However, for most keyservers, most of the keys will be in the indirect
    keydump anyway.
  *)
  let canonicalize_direct () =
    ctr := 0;
    perror "Starting direct canonicalization";
    let clean ~hash ~keystr =
      let skey = Keydb.skey_of_string keystr in
      if not (Keydb.skey_is_offset skey) then
        let key = Keydb.key_of_skey skey in
        tick ();
        (* ignore offsets, they're handled elsewhere *)
        do_opt do_action (canonicalize_key key)
    in
    Keydb.raw_iter clean;
    perror "Direct canonicalization complete"

  let canonicalize () =
    canonicalize_indirect ();
    canonicalize_direct ()

  (***************************************************************)
  (***************************************************************)
  (***************************************************************)

  (** internal function: retrieves list of (key,data) duplicates for a given
    cursor *)
  let rec get_dups_rec cursor accum =
    try
      let (key,data) = Cursor.get cursor Cursor.NEXT_DUP [] in
      get_dups_rec cursor ((key,data)::accum)
    with
        Not_found -> accum

  (** returns pair of key and duplicate data for the given cursor *)
  let get_dups cursor =
    let pairs = get_dups_rec cursor [] in
    match pairs with
        [] -> failwith "get_dups retrieved empty list"
      | (key,data)::tail ->
          let dtail = List.map tail
                        ~f:(fun (tkey,tdata) -> if tkey <> key
                            then failwith "get_dups retrieved non-duplicate"
                            else tdata
                           )
          in
          (key,data::dtail)

  (** checks if a sorted list has duplicates *)
  let rec has_dups list = match list with
      [] -> false
    | [hd] -> false
    | hd1::hd2::tl ->
        if hd1 = hd2 then true
        else has_dups (hd2::tl)

  (** merges keys given the key hashes.  The [keyid] argument is there just to
    make logging more understandable *)
  let merge_from_hashes keyid hashes =
    (* Sort hashes and remove duplicates, if any *)
    let hashes = List.sort ~cmp:compare hashes in
    let hashes =
      if has_dups hashes then (
        perror "Duplicates found in hash list";
        MList.dedup hashes
      ) else hashes
    in

    (** fetches a key from its hash *)
    let key_from_hash hash =
      try
        let key = Keydb.get_by_hash hash in
        let newhash = KeyHash.hash key in
        if newhash <> hash then
          perror "Key hashes do not match up:\n\trequested: %s\n\tfound: %s"
                     (KeyHash.hexify hash) (KeyHash.hexify newhash);
        Some key
      with
          Not_found ->
            perror "Database corruption: Key matched up to keyid not found in database:\n\tkeyid: %s\n\thash: %s"
            (Fingerprint.keyid_to_string keyid) (KeyHash.hexify hash);
            None
    in
    let keys = strip_opt (List.map ~f:key_from_hash hashes) in
    (* compute the list of replacements and apply them *)
    let replacements = Fixkey.compute_merge_replacements keys in
    if List.length replacements > 0
    then perror "%d replacements found" (List.length replacements);
    List.iter replacements
      ~f:(fun (delete_list,newkey) ->
            perror "replacing %d keys with single merged key"
            (List.length delete_list);
            List.iter delete_list
              ~f:(fun key -> perror "removing: %s"
                    (KeyHash.hexify (KeyHash.hash key)));
            perror "adding: %s"
              (KeyHash.hexify (KeyHash.hash newkey));
            Keydb.replace delete_list newkey;
            perror "Transaction complete"
         )





  (** find all sets of key with the same keyid and merge them if possible *)
  let merge () =
    ctr := 0;

    perror "Starting key merge";
    let dbs = Keydb.get_dbs () in
    let c = Cursor.create dbs.Keydb.keyid in

    let (first_keyid,first_hash) = Cursor.get c Cursor.FIRST [] in

    let finished = ref false
    and keyid = ref first_keyid
    and hash = ref first_hash
    in
    while not !finished do
      tick ();
      if Cursor.count c > 1 then (
        let (dup_keyid,hashes) = get_dups c in
        if dup_keyid <> !keyid then failwith "Failure retrieving duplicates";
        let hashes = !hash::hashes in
        perror "%s" ("Multiple keys found with same ID.  " ^
                     "merge_from_hashes called");
        List.iter hashes
          ~f:(fun hash -> perror "Hash: %s" (KeyHash.hexify hash));
        merge_from_hashes !keyid hashes
      );
      try
        let (new_keyid,new_hash) = Cursor.get c Cursor.NEXT [] in
        keyid := new_keyid;
        hash := new_hash
      with
          Not_found -> finished := true
    done;
    perror "Completed key merge"


  (** Run filters that are not already contained in [applied_filters] *)
  let run applied_filters =

    (* only do canonicalize if it's necessary *)
    if not (List.mem "yminsky.dedup" applied_filters) then (
      perror "Deduping keys in database";
      canonicalize ();
      Keydb.set_meta ~key:"filters" ~data:"yminsky.dedup";
      Keydb.unconditional_checkpoint ();
    ) else perror "Database already deduped";


    (* note: if dedup was done, merge should be done again *)
    if not (List.mem "yminsky.dedup" applied_filters)
      || not (List.mem "yminsky.merge" applied_filters)
    then (
      perror "Merging keys in database";
      merge ();
      Keydb.set_meta ~key:"filters" ~data:"yminsky.dedup,yminsky.merge";
      Keydb.unconditional_checkpoint ();
    )
    else perror "Database already merged"



  let comma = Str.regexp ","

  let run () =
    set_logfile "clean";
        perror "Running SKS %s%s" Common.version Common.version_suffix;
    Keydb.open_dbs settings;
    perror "Keydb opened";

    let applied_filters =
      try Str.split comma (Keydb.get_meta "filters")
      with Not_found -> []
    in
    run applied_filters;

    Keydb.close_dbs ()

end