File: test_missing_ancestors.rs

package info (click to toggle)
mercurial 6.3.2-1%2Bdeb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 42,052 kB
  • sloc: python: 199,820; ansic: 46,300; tcl: 3,715; sh: 1,676; lisp: 1,483; cpp: 864; javascript: 649; makefile: 626; xml: 36; sql: 30
file content (337 lines) | stat: -rw-r--r-- 11,337 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
use hg::testing::VecGraph;
use hg::Revision;
use hg::*;
use rand::distributions::{Distribution, Uniform};
use rand::{thread_rng, Rng, RngCore, SeedableRng};
use rand_distr::LogNormal;
use std::cmp::min;
use std::collections::HashSet;
use std::env;
use std::fmt::Debug;

fn build_random_graph(
    nodes_opt: Option<usize>,
    rootprob_opt: Option<f64>,
    mergeprob_opt: Option<f64>,
    prevprob_opt: Option<f64>,
) -> VecGraph {
    let nodes = nodes_opt.unwrap_or(100);
    let rootprob = rootprob_opt.unwrap_or(0.05);
    let mergeprob = mergeprob_opt.unwrap_or(0.2);
    let prevprob = prevprob_opt.unwrap_or(0.7);

    let mut rng = thread_rng();
    let mut vg: VecGraph = Vec::with_capacity(nodes);
    for i in 0..nodes {
        if i == 0 || rng.gen_bool(rootprob) {
            vg.push([NULL_REVISION, NULL_REVISION])
        } else if i == 1 {
            vg.push([0, NULL_REVISION])
        } else if rng.gen_bool(mergeprob) {
            let p1 = {
                if i == 2 || rng.gen_bool(prevprob) {
                    (i - 1) as Revision
                } else {
                    rng.gen_range(0..i - 1) as Revision
                }
            };
            // p2 is a random revision lower than i and different from p1
            let mut p2 = rng.gen_range(0..i - 1) as Revision;
            if p2 >= p1 {
                p2 = p2 + 1;
            }
            vg.push([p1, p2]);
        } else if rng.gen_bool(prevprob) {
            vg.push([(i - 1) as Revision, NULL_REVISION])
        } else {
            vg.push([rng.gen_range(0..i - 1) as Revision, NULL_REVISION])
        }
    }
    vg
}

/// Compute the ancestors set of all revisions of a VecGraph
fn ancestors_sets(vg: &VecGraph) -> Vec<HashSet<Revision>> {
    let mut ancs: Vec<HashSet<Revision>> = Vec::new();
    for i in 0..vg.len() {
        let mut ancs_i = HashSet::new();
        ancs_i.insert(i as Revision);
        for p in vg[i].iter().cloned() {
            if p != NULL_REVISION {
                ancs_i.extend(&ancs[p as usize]);
            }
        }
        ancs.push(ancs_i);
    }
    ancs
}

#[derive(Clone, Debug)]
enum MissingAncestorsAction {
    InitialBases(HashSet<Revision>),
    AddBases(HashSet<Revision>),
    RemoveAncestorsFrom(HashSet<Revision>),
    MissingAncestors(HashSet<Revision>),
}

/// An instrumented naive yet obviously correct implementation
///
/// It also records all its actions for easy reproduction for replay
/// of problematic cases
struct NaiveMissingAncestors<'a> {
    ancestors_sets: &'a Vec<HashSet<Revision>>,
    graph: &'a VecGraph, // used for error reporting only
    bases: HashSet<Revision>,
    history: Vec<MissingAncestorsAction>,
    // for error reporting, assuming we are in a random test
    random_seed: String,
}

impl<'a> NaiveMissingAncestors<'a> {
    fn new(
        graph: &'a VecGraph,
        ancestors_sets: &'a Vec<HashSet<Revision>>,
        bases: &HashSet<Revision>,
        random_seed: &str,
    ) -> Self {
        Self {
            ancestors_sets: ancestors_sets,
            bases: bases.clone(),
            graph: graph,
            history: vec![MissingAncestorsAction::InitialBases(bases.clone())],
            random_seed: random_seed.into(),
        }
    }

    fn add_bases(&mut self, new_bases: HashSet<Revision>) {
        self.bases.extend(&new_bases);
        self.history
            .push(MissingAncestorsAction::AddBases(new_bases))
    }

    fn remove_ancestors_from(&mut self, revs: &mut HashSet<Revision>) {
        revs.remove(&NULL_REVISION);
        self.history
            .push(MissingAncestorsAction::RemoveAncestorsFrom(revs.clone()));
        for base in self.bases.iter().cloned() {
            if base != NULL_REVISION {
                for rev in &self.ancestors_sets[base as usize] {
                    revs.remove(&rev);
                }
            }
        }
    }

    fn missing_ancestors(
        &mut self,
        revs: impl IntoIterator<Item = Revision>,
    ) -> Vec<Revision> {
        let revs_as_set: HashSet<Revision> = revs.into_iter().collect();

        let mut missing: HashSet<Revision> = HashSet::new();
        for rev in revs_as_set.iter().cloned() {
            if rev != NULL_REVISION {
                missing.extend(&self.ancestors_sets[rev as usize])
            }
        }
        self.history
            .push(MissingAncestorsAction::MissingAncestors(revs_as_set));

        for base in self.bases.iter().cloned() {
            if base != NULL_REVISION {
                for rev in &self.ancestors_sets[base as usize] {
                    missing.remove(&rev);
                }
            }
        }
        let mut res: Vec<Revision> = missing.iter().cloned().collect();
        res.sort();
        res
    }

    fn assert_eq<T>(&self, left: T, right: T)
    where
        T: PartialEq + Debug,
    {
        if left == right {
            return;
        }
        panic!(
            "Equality assertion failed (left != right)
                left={:?}
                right={:?}
                graph={:?}
                current bases={:?}
                history={:?}
                random seed={}
            ",
            left,
            right,
            self.graph,
            self.bases,
            self.history,
            self.random_seed,
        );
    }
}

/// Choose a set of random revisions
///
/// The size of the set is taken from a LogNormal distribution
/// with default mu=1.1 and default sigma=0.8. Quoting the Python
/// test this is taken from:
///     the default mu and sigma give us a nice distribution of mostly
///     single-digit counts (including 0) with some higher ones
/// The sample may include NULL_REVISION
fn sample_revs<R: RngCore>(
    rng: &mut R,
    maxrev: Revision,
    mu_opt: Option<f64>,
    sigma_opt: Option<f64>,
) -> HashSet<Revision> {
    let mu = mu_opt.unwrap_or(1.1);
    let sigma = sigma_opt.unwrap_or(0.8);

    let log_normal = LogNormal::new(mu, sigma).unwrap();
    let nb = min(maxrev as usize, log_normal.sample(rng).floor() as usize);

    let dist = Uniform::from(NULL_REVISION..maxrev);
    return rng.sample_iter(&dist).take(nb).collect();
}

/// Produces the hexadecimal representation of a slice of bytes
fn hex_bytes(bytes: &[u8]) -> String {
    let mut s = String::with_capacity(bytes.len() * 2);
    for b in bytes {
        s.push_str(&format!("{:x}", b));
    }
    s
}

/// Fill a random seed from its hexadecimal representation.
///
/// This signature is meant to be consistent with `RngCore::fill_bytes`
fn seed_parse_in(hex: &str, seed: &mut [u8]) {
    if hex.len() != 32 {
        panic!("Seed {} is too short for 128 bits hex", hex);
    }
    for i in 0..8 {
        seed[i] = u8::from_str_radix(&hex[2 * i..2 * (i + 1)], 16)
            .unwrap_or_else(|_e| panic!("Seed {} is not 128 bits hex", hex));
    }
}

/// Parse the parameters for `test_missing_ancestors()`
///
/// Returns (graphs, instances, calls per instance)
fn parse_test_missing_ancestors_params(var: &str) -> (usize, usize, usize) {
    let err_msg = "TEST_MISSING_ANCESTORS format: GRAPHS,INSTANCES,CALLS";
    let params: Vec<usize> = var
        .split(',')
        .map(|n| n.trim().parse().expect(err_msg))
        .collect();
    if params.len() != 3 {
        panic!("{}", err_msg);
    }
    (params[0], params[1], params[2])
}

#[test]
/// This test creates lots of random VecGraphs,
/// and compare a bunch of MissingAncestors for them with
/// NaiveMissingAncestors that rely on precomputed transitive closures of
/// these VecGraphs (ancestors_sets).
///
/// For each generater graph, several instances of `MissingAncestors` are
/// created, whose methods are called and checked a given number of times.
///
/// This test can be parametrized by two environment variables:
///
/// - TEST_RANDOM_SEED: must be 128 bits in hexadecimal
/// - TEST_MISSING_ANCESTORS: "GRAPHS,INSTANCES,CALLS". The default is
///   "100,10,10"
///
/// This is slow: it runs on my workstation in about 5 seconds with the
/// default parameters with a plain `cargo --test`.
///
/// If you want to run it faster, especially if you're changing the
/// parameters, use `cargo test --release`.
/// For me, that gets it down to 0.15 seconds with the default parameters
fn test_missing_ancestors_compare_naive() {
    let (graphcount, testcount, inccount) =
        match env::var("TEST_MISSING_ANCESTORS") {
            Err(env::VarError::NotPresent) => (100, 10, 10),
            Ok(val) => parse_test_missing_ancestors_params(&val),
            Err(env::VarError::NotUnicode(_)) => {
                panic!("TEST_MISSING_ANCESTORS is invalid");
            }
        };
    let mut seed: [u8; 16] = [0; 16];
    match env::var("TEST_RANDOM_SEED") {
        Ok(val) => {
            seed_parse_in(&val, &mut seed);
        }
        Err(env::VarError::NotPresent) => {
            thread_rng().fill_bytes(&mut seed);
        }
        Err(env::VarError::NotUnicode(_)) => {
            panic!("TEST_RANDOM_SEED must be 128 bits in hex");
        }
    }
    let hex_seed = hex_bytes(&seed);
    eprintln!("Random seed: {}", hex_seed);

    let mut rng = rand_pcg::Pcg32::from_seed(seed);

    eprint!("Checking MissingAncestors against brute force implementation ");
    eprint!("for {} random graphs, ", graphcount);
    eprintln!(
        "with {} instances for each and {} calls per instance",
        testcount, inccount,
    );
    for g in 0..graphcount {
        if g != 0 && g % 100 == 0 {
            eprintln!("Tested with {} graphs", g);
        }
        let graph = build_random_graph(None, None, None, None);
        let graph_len = graph.len() as Revision;
        let ancestors_sets = ancestors_sets(&graph);
        for _testno in 0..testcount {
            let bases: HashSet<Revision> =
                sample_revs(&mut rng, graph_len, None, None);
            let mut inc = MissingAncestors::<VecGraph>::new(
                graph.clone(),
                bases.clone(),
            );
            let mut naive = NaiveMissingAncestors::new(
                &graph,
                &ancestors_sets,
                &bases,
                &hex_seed,
            );
            for _m in 0..inccount {
                if rng.gen_bool(0.2) {
                    let new_bases =
                        sample_revs(&mut rng, graph_len, None, None);
                    inc.add_bases(new_bases.iter().cloned());
                    naive.add_bases(new_bases);
                }
                if rng.gen_bool(0.4) {
                    // larger set so that there are more revs to remove from
                    let mut hrevs =
                        sample_revs(&mut rng, graph_len, Some(1.5), None);
                    let mut rrevs = hrevs.clone();
                    inc.remove_ancestors_from(&mut hrevs).unwrap();
                    naive.remove_ancestors_from(&mut rrevs);
                    naive.assert_eq(hrevs, rrevs);
                } else {
                    let revs = sample_revs(&mut rng, graph_len, None, None);
                    let hm =
                        inc.missing_ancestors(revs.iter().cloned()).unwrap();
                    let rm = naive.missing_ancestors(revs.iter().cloned());
                    naive.assert_eq(hm, rm);
                }
            }
        }
    }
}