1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267
|
(************************************************************************)
(* This file is part of SKS. SKS is free software; you can
redistribute it and/or modify it under the terms of the GNU General
Public License as published by the Free Software Foundation; either
version 2 of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
USA *)
(***********************************************************************)
open Common
open StdLabels
open MoreLabels
open Printf
(*open Pstyle *)
module Set = PSet.Set
open ReconPTreeDb
(*
#directory "/home/yminsky/Work/projects/keyserver/sks"
#load "reconPTreeDb.cmo"
*)
let rec read_lines f accum =
let line =
try Some (input_line f)
with End_of_file -> None
in
match line with
Some line -> read_lines f (line::accum)
| None -> List.rev accum
let read_lines f = read_lines f []
let entry_hash entry = match entry with
| Add hash -> hash
| Delete hash -> hash
let ch_piece ch pos line =
if pos >= String.length line then raise Not_found;
try
let newpos = String.index_from line pos ch in
(newpos+1,
String.sub line ~pos ~len:(newpos - pos))
with
Not_found -> (String.length line,
String.sub line ~pos ~len:(String.length line - pos))
let rec ch_pieces ch pos line =
let (newpos,piece) = ch_piece ch pos line in
try piece::(ch_pieces ch newpos line)
with Not_found -> piece::[]
let ws = Str.regexp " "
let line_to_entry line =
let pieces = Array.of_list (ch_pieces ' ' 0 line) in
let hash = KeyHash.dehexify pieces.(3) in
match pieces.(2) with
| "Add" -> Add hash
| "Del" -> Delete hash
| _ -> failwith "unparseable line"
(** compute the symmetric difference between two arrays
sorted in increasing order
*)
let array_diff a1 a2 =
let c1 = ref 0 and c2 = ref 0 in
let diff1 = ref [] and diff2 = ref [] in
let add1 () =
diff1 := a1.(!c1)::!diff1;
incr c1
and add2 () =
diff2 := a2.(!c2)::!diff2;
incr c2
in
while !c1 < Array.length a1 || !c2 < Array.length a2 do
if !c1 >= Array.length a1 then add2 ()
else if !c2 >= Array.length a2 then add1 ()
else if a1.(!c1) = a2.(!c2) then ( incr c1; incr c2; )
else if a1.(!c1) < a2.(!c2) then add1 ()
else add2 ()
done;
(List.rev !diff1,List.rev !diff2)
let rec read_entries f accum =
let line =
try Some (input_line f)
with End_of_file -> None
in
match line with
Some line -> read_entries f (line_to_entry line::accum)
| None -> Array.of_list (List.rev accum)
let read_entries fname =
let f = open_in fname in
let run () =
ignore (input_line f);
read_entries f []
in
protect ~f:run ~finally:(fun () -> close_in f)
let get_entries fname =
let f = open_in fname in
let run () =
let lines = read_lines f in
let lines = Array.of_list lines in
Array.map ~f:line_to_entry lines
in
protect ~f:run ~finally:(fun () -> close_in f)
let zz_of_hstr hstr =
let hash = KeyHash.dehexify hstr in
ZZp.of_bytes hash
let ptree_mem hstr =
let zz = zz_of_hstr hstr in
let rec loop depth =
match (PTree.get_node ~sef:true !ptree zz depth).PTree.children with
| PTree.Children _ -> loop (depth+1)
| PTree.Leaf elements -> Set.mem (ZZp.to_bytes zz) elements
in
loop 0
let rec get_groups entries pos group accum =
if pos >= Array.length entries then
if group = [] then accum
else group::accum
else (
match group with
| [] -> get_groups entries (pos+1) [entries.(pos)] accum
| group_hd::_ ->
if entry_hash entries.(pos) = entry_hash group_hd
then get_groups entries (pos+1) (entries.(pos)::group) accum
else get_groups entries (pos+1) [entries.(pos)] (group::accum)
)
let get_groups entries = get_groups entries 0 [] []
let rec last list = match list with
[hd] -> hd
| hd::tl -> last tl
| [] -> raise Not_found
let simplify_groups groups =
Array.of_list (List.rev_map ~f:last groups)
let bad_entry entry = match entry with
| Add hash -> if ptree_mem hash then false else true
| Delete hash -> if ptree_mem hash then true else false
let trunc s = String.sub ~pos:0 ~len:16 s
let get_ptree_hashes () =
PTree.summarize_tree
~lagg:(fun set -> Array.map ~f:trunc
(Array.of_list (Set.elements set)))
~cagg:(fun alist -> Array.concat (Array.to_list alist))
!ptree
let lpush el lref = lref := el::!lref
let get_entry_droplist entries =
let droplist = ref [] in
for i = 0 to Array.length entries - 2 do
if entry_hash entries.(i) = entry_hash entries.(i+1) then
lpush i droplist
done;
List.rev !droplist
let dedup_entries entries =
let droplist = get_entry_droplist entries in
let drops = Set.of_list droplist in
let new_entries = Array.make (Array.length entries - List.length droplist)
entries.(0)
in
let pos = ref 0 in
for i = 0 to Array.length entries - 1 do
if not (Set.mem i drops) then (
new_entries.(!pos) <- entries.(i);
incr pos
)
done;
new_entries
let get_simplified_entries fname =
perror "reading entries from log";
let entries = read_entries fname in
perror "sorting log entries";
Array.stable_sort entries
~cmp:(fun x y -> compare (entry_hash x) (entry_hash y));
perror "deduping log entries";
dedup_entries entries
let count_adds entries =
Array.fold_left ~init:0 entries
~f:(fun count entry -> match entry with
Add hash -> count + 1
| _ -> count)
let get_hashes simplified_entries =
perror "extracting adds";
let adds = count_adds simplified_entries in
let hashes = Array.create adds "" in
let pos = ref 0 in
Array.iter simplified_entries
~f:(function Add hash ->
hashes.(!pos) <- hash; incr pos
| Delete hash -> ());
hashes
let get_diffs () =
let hashes = get_hashes (get_simplified_entries "log.real") in
perror "Getting hashes from prefix tree...";
let phashes = get_ptree_hashes () in
perror "computing difference...";
let (diff1,diff2) = array_diff hashes phashes in
(Set.of_list diff1,Set.of_list diff2)
let rec line_iter ~f file =
let line =
try Some (input_line file)
with End_of_file -> None
in
match line with
| Some line -> f line; line_iter ~f file
| None -> ()
let rewrite_log diff1 diff2 =
let infile = open_in "log.real" in
let outfile = open_out "log.real.annot" in
output_string outfile (input_line infile);
output_string outfile "\n";
line_iter infile
~f:(fun line ->
output_string outfile line;
let entry = line_to_entry line in
if Set.mem (entry_hash entry) diff1 then
output_string outfile " <--- INLOG"
else if Set.mem (entry_hash entry) diff2 then
output_string outfile " <--- INPTR";
output_string outfile "\n"
);
close_in infile;
close_out outfile
let runtest () =
let (diff1,diff2) = get_diffs () in
perror "Rewriting log";
rewrite_log diff1 diff2
let () = runtest ()
|