File: checksum.ml

package info (click to toggle)
unison-2.51%2B4.11.1 2.51.3-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 4,636 kB
  • sloc: ml: 34,056; objc: 3,605; ansic: 1,400; makefile: 722; python: 430; sh: 80
file content (73 lines) | stat: -rw-r--r-- 2,990 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
(* Unison file synchronizer: src/checksum.ml *)
(* Copyright 1999-2020, Benjamin C. Pierce

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
*)


(* The checksum (or fast fingerprinting) algorithm must be fast and has to   *)
(* be called in a rolling fashion (i.e. we must be able to calculate a new   *)
(* checksum when provided the current checksum, the outgoing character and   *)
(* the incoming one).                                                        *)

(* Definition: cksum([c_n, c_{n-1}, ..., c_0]) = Sum c_i * 16381 ^ i         *)

type t = int

type u = int array

(* [power v n] computes [v ^ n]                                              *)
let rec power v n =
  if n = 0 then 1 else
  let v' = power v (n / 2) in
  let v'' = v' * v' in
  if n land 1 <> 0 then v * v'' else v''

(* Takes the block length and returns a pre-computed table for the function  *)
(* roll: If [init l] => I, then I_n = n * 16381 ^ (l + 1), for 0 <= n < 256  *)
(* NB: 256 is the upper-bound of ASCII code returned by Char.code            *)

let init l =
  let p = power 16381 (l + 1) in
  Array.init 256 (fun i -> (i * p) land 0x7fffffff)

(* Function [roll] computes fixed-length checksum from previous checksum     *)
(* Roughly: given the pre-computed table, compute the new checksum from the  *)
(* old one along with the outgoing and incoming characters, i.e.,            *)
(*                                                                         - *)
(* [roll cksum([c_n, ..., c_0]) c_n c'] => cksum([c_{n-1}, ..., c_0, c'])    *)
(*                                                                         - *)
let roll init cksum cout cin =
  let v = cksum + Char.code cin in
  (v lsl 14 - (v + v + v) (* v * 16381 *)
    - Array.unsafe_get init (Char.code cout)) land 0x7fffffff

(* Function [substring] computes checksum for a given substring in one batch *)
(* process: [substring s p l] => cksum([s_p, ..., s_{p + l - 1}])            *)

let substring s p l =
  let cksum = ref 0 in
  for i = p to p + l - 1 do
    let v = !cksum + Char.code (String.unsafe_get s i) in
    cksum := (v lsl 14 - (v + v + v)) (* v * 16381 *)
  done;
  !cksum land 0x7fffffff

let subbytes s p l =
  let cksum = ref 0 in
  for i = p to p + l - 1 do
    let v = !cksum + Char.code (Bytes.unsafe_get s i) in
    cksum := (v lsl 14 - (v + v + v)) (* v * 16381 *)
  done;
  !cksum land 0x7fffffff